diff --git a/src/core/fex/7z_C/7z.h b/src/core/fex/7z_C/7z.h index 1d2ba63f..b42405cc 100644 --- a/src/core/fex/7z_C/7z.h +++ b/src/core/fex/7z_C/7z.h @@ -1,200 +1,204 @@ -/* 7z.h -- 7z interface -2010-03-11 : Igor Pavlov : Public domain */ - -#ifndef __7Z_H -#define __7Z_H - -#include "7zBuf.h" - -EXTERN_C_BEGIN - -#define k7zStartHeaderSize 0x20 -#define k7zSignatureSize 6 -extern Byte k7zSignature[k7zSignatureSize]; -#define k7zMajorVersion 0 - -enum EIdEnum { - k7zIdEnd, - k7zIdHeader, - k7zIdArchiveProperties, - k7zIdAdditionalStreamsInfo, - k7zIdMainStreamsInfo, - k7zIdFilesInfo, - k7zIdPackInfo, - k7zIdUnpackInfo, - k7zIdSubStreamsInfo, - k7zIdSize, - k7zIdCRC, - k7zIdFolder, - k7zIdCodersUnpackSize, - k7zIdNumUnpackStream, - k7zIdEmptyStream, - k7zIdEmptyFile, - k7zIdAnti, - k7zIdName, - k7zIdCTime, - k7zIdATime, - k7zIdMTime, - k7zIdWinAttributes, - k7zIdComment, - k7zIdEncodedHeader, - k7zIdStartPos, - k7zIdDummy -}; - -typedef struct -{ - UInt32 NumInStreams; - UInt32 NumOutStreams; - UInt64 MethodID; - CBuf Props; -} CSzCoderInfo; - -void SzCoderInfo_Init(CSzCoderInfo* p); -void SzCoderInfo_Free(CSzCoderInfo* p, ISzAlloc* alloc); - -typedef struct -{ - UInt32 InIndex; - UInt32 OutIndex; -} CSzBindPair; - -typedef struct -{ - CSzCoderInfo* Coders; - CSzBindPair* BindPairs; - UInt32* PackStreams; - UInt64* UnpackSizes; - UInt32 NumCoders; - UInt32 NumBindPairs; - UInt32 NumPackStreams; - int UnpackCRCDefined; - UInt32 UnpackCRC; - - UInt32 NumUnpackStreams; -} CSzFolder; - -void SzFolder_Init(CSzFolder* p); -UInt64 SzFolder_GetUnpackSize(CSzFolder* p); -int SzFolder_FindBindPairForInStream(CSzFolder* p, UInt32 inStreamIndex); -UInt32 SzFolder_GetNumOutStreams(CSzFolder* p); -UInt64 SzFolder_GetUnpackSize(CSzFolder* p); - -SRes SzFolder_Decode(const CSzFolder* folder, const UInt64* packSizes, - ILookInStream* stream, UInt64 startPos, - Byte* outBuffer, size_t outSize, ISzAlloc* allocMain); - -typedef struct -{ - UInt32 Low; - UInt32 High; -} CNtfsFileTime; - -typedef struct -{ - CNtfsFileTime MTime; - UInt64 Size; - UInt32 Crc; - UInt32 Attrib; - Byte HasStream; - Byte IsDir; - Byte IsAnti; - Byte CrcDefined; - Byte MTimeDefined; - Byte AttribDefined; -} CSzFileItem; - -void SzFile_Init(CSzFileItem* p); - -typedef struct -{ - UInt64* PackSizes; - Byte* PackCRCsDefined; - UInt32* PackCRCs; - CSzFolder* Folders; - CSzFileItem* Files; - UInt32 NumPackStreams; - UInt32 NumFolders; - UInt32 NumFiles; -} CSzAr; - -void SzAr_Init(CSzAr* p); -void SzAr_Free(CSzAr* p, ISzAlloc* alloc); - -/* - SzExtract extracts file from archive - - *outBuffer must be 0 before first call for each new archive. - - Extracting cache: - If you need to decompress more than one file, you can send - these values from previous call: - *blockIndex, - *outBuffer, - *outBufferSize - You can consider "*outBuffer" as cache of solid block. If your archive is solid, - it will increase decompression speed. - - If you use external function, you can declare these 3 cache variables - (blockIndex, outBuffer, outBufferSize) as static in that external function. - - Free *outBuffer and set *outBuffer to 0, if you want to flush cache. -*/ - -typedef struct -{ - CSzAr db; - - UInt64 startPosAfterHeader; - UInt64 dataPos; - - UInt32* FolderStartPackStreamIndex; - UInt64* PackStreamStartPositions; - UInt32* FolderStartFileIndex; - UInt32* FileIndexToFolderIndexMap; - - size_t* FileNameOffsets; /* in 2-byte steps */ - CBuf FileNames; /* UTF-16-LE */ -} CSzArEx; - -void SzArEx_Init(CSzArEx* p); -void SzArEx_Free(CSzArEx* p, ISzAlloc* alloc); -UInt64 SzArEx_GetFolderStreamPos(const CSzArEx* p, UInt32 folderIndex, UInt32 indexInFolder); -int SzArEx_GetFolderFullPackSize(const CSzArEx* p, UInt32 folderIndex, UInt64* resSize); - -/* -if dest == NULL, the return value specifies the required size of the buffer, - in 16-bit characters, including the null-terminating character. -if dest != NULL, the return value specifies the number of 16-bit characters that - are written to the dest, including the null-terminating character. */ - -size_t SzArEx_GetFileNameUtf16(const CSzArEx* p, size_t fileIndex, UInt16* dest); - -SRes SzArEx_Extract( - const CSzArEx* db, - ILookInStream* inStream, - UInt32 fileIndex, /* index of file */ - UInt32* blockIndex, /* index of solid block */ - Byte** outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ - size_t* outBufferSize, /* buffer size for output buffer */ - size_t* offset, /* offset of stream for required file in *outBuffer */ - size_t* outSizeProcessed, /* size of file in *outBuffer */ - ISzAlloc* allocMain, - ISzAlloc* allocTemp); - -/* -SzArEx_Open Errors: -SZ_ERROR_NO_ARCHIVE -SZ_ERROR_ARCHIVE -SZ_ERROR_UNSUPPORTED -SZ_ERROR_MEM -SZ_ERROR_CRC -SZ_ERROR_INPUT_EOF -SZ_ERROR_FAIL -*/ - -SRes SzArEx_Open(CSzArEx* p, ILookInStream* inStream, ISzAlloc* allocMain, ISzAlloc* allocTemp); - -EXTERN_C_END - -#endif +/* 7z.h -- 7z interface +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_H +#define ZIP7_INC_7Z_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define k7zStartHeaderSize 0x20 +#define k7zSignatureSize 6 + +extern const Byte k7zSignature[k7zSignatureSize]; + +typedef struct +{ + const Byte *Data; + size_t Size; +} CSzData; + +/* CSzCoderInfo & CSzFolder support only default methods */ + +typedef struct +{ + size_t PropsOffset; + UInt32 MethodID; + Byte NumStreams; + Byte PropsSize; +} CSzCoderInfo; + +typedef struct +{ + UInt32 InIndex; + UInt32 OutIndex; +} CSzBond; + +#define SZ_NUM_CODERS_IN_FOLDER_MAX 4 +#define SZ_NUM_BONDS_IN_FOLDER_MAX 3 +#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4 + +typedef struct +{ + UInt32 NumCoders; + UInt32 NumBonds; + UInt32 NumPackStreams; + UInt32 UnpackStream; + UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX]; + CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX]; + CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX]; +} CSzFolder; + + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd); + +typedef struct +{ + UInt32 Low; + UInt32 High; +} CNtfsFileTime; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + UInt32 *Vals; +} CSzBitUi32s; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + // UInt64 *Vals; + CNtfsFileTime *Vals; +} CSzBitUi64s; + +#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +typedef struct +{ + UInt32 NumPackStreams; + UInt32 NumFolders; + + UInt64 *PackPositions; // NumPackStreams + 1 + CSzBitUi32s FolderCRCs; // NumFolders + + size_t *FoCodersOffsets; // NumFolders + 1 + UInt32 *FoStartPackStreamIndex; // NumFolders + 1 + UInt32 *FoToCoderUnpackSizes; // NumFolders + 1 + Byte *FoToMainUnpackSizeIndex; // NumFolders + UInt64 *CoderUnpackSizes; // for all coders in all folders + + Byte *CodersData; + + UInt64 RangeLimit; +} CSzAr; + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr stream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain); + +typedef struct +{ + CSzAr db; + + UInt64 startPosAfterHeader; + UInt64 dataPos; + + UInt32 NumFiles; + + UInt64 *UnpackPositions; // NumFiles + 1 + // Byte *IsEmptyFiles; + Byte *IsDirs; + CSzBitUi32s CRCs; + + CSzBitUi32s Attribs; + // CSzBitUi32s Parents; + CSzBitUi64s MTime; + CSzBitUi64s CTime; + + UInt32 *FolderToFile; // NumFolders + 1 + UInt32 *FileToFolder; // NumFiles + + size_t *FileNameOffsets; /* in 2-byte steps */ + Byte *FileNames; /* UTF-16-LE */ +} CSzArEx; + +#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i)) + +#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i]) + +void SzArEx_Init(CSzArEx *p); +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc); +UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder); +int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize); + +/* +if dest == NULL, the return value specifies the required size of the buffer, + in 16-bit characters, including the null-terminating character. +if dest != NULL, the return value specifies the number of 16-bit characters that + are written to the dest, including the null-terminating character. */ + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest); + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex); +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest); +*/ + + + +/* + SzArEx_Extract extracts file from archive + + *outBuffer must be 0 before first call for each new archive. + + Extracting cache: + If you need to decompress more than one file, you can send + these values from previous call: + *blockIndex, + *outBuffer, + *outBufferSize + You can consider "*outBuffer" as cache of solid block. If your archive is solid, + it will increase decompression speed. + + If you use external function, you can declare these 3 cache variables + (blockIndex, outBuffer, outBufferSize) as static in that external function. + + Free *outBuffer and set *outBuffer to 0, if you want to flush cache. +*/ + +SRes SzArEx_Extract( + const CSzArEx *db, + ILookInStreamPtr inStream, + UInt32 fileIndex, /* index of file */ + UInt32 *blockIndex, /* index of solid block */ + Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ + size_t *outBufferSize, /* buffer size for output buffer */ + size_t *offset, /* offset of stream for required file in *outBuffer */ + size_t *outSizeProcessed, /* size of file in *outBuffer */ + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp); + + +/* +SzArEx_Open Errors: +SZ_ERROR_NO_ARCHIVE +SZ_ERROR_ARCHIVE +SZ_ERROR_UNSUPPORTED +SZ_ERROR_MEM +SZ_ERROR_CRC +SZ_ERROR_INPUT_EOF +SZ_ERROR_FAIL +*/ + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/7zAlloc.c b/src/core/fex/7z_C/7zAlloc.c index bff20f9f..3a8e2cbf 100644 --- a/src/core/fex/7z_C/7zAlloc.c +++ b/src/core/fex/7z_C/7zAlloc.c @@ -1,74 +1,89 @@ -/* 7zAlloc.c -- Allocation functions -2010-10-29 : Igor Pavlov : Public domain */ - -#include "7zAlloc.h" - -/* #define _SZ_ALLOC_DEBUG */ -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ - -#ifdef _SZ_ALLOC_DEBUG - -#ifdef _WIN32 -#include -#endif - -#include -int g_allocCount = 0; -int g_allocCountTemp = 0; - -#endif - -void* SzAlloc(void* p, size_t size) -{ - (void)p; // unused param - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc %10d bytes; count = %10d", size, g_allocCount); - g_allocCount++; -#endif - return malloc(size); -} - -void SzFree(void* p, void* address) -{ - (void)p; // unused param -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) { - g_allocCount--; - fprintf(stderr, "\nFree; count = %10d", g_allocCount); - } -#endif - free(address); -} - -void* SzAllocTemp(void* p, size_t size) -{ - (void)p; // unused param - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_temp %10d bytes; count = %10d", size, g_allocCountTemp); - g_allocCountTemp++; -#ifdef _WIN32 - return HeapAlloc(GetProcessHeap(), 0, size); -#endif -#endif - return malloc(size); -} - -void SzFreeTemp(void* p, void* address) -{ - (void)p; // unused param -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) { - g_allocCountTemp--; - fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp); - } -#ifdef _WIN32 - HeapFree(GetProcessHeap(), 0, address); - return; -#endif -#endif - free(address); -} +/* 7zAlloc.c -- Allocation functions for 7z processing +2023-03-04 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zAlloc.h" + +/* #define SZ_ALLOC_DEBUG */ +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ + +#ifdef SZ_ALLOC_DEBUG + +/* +#ifdef _WIN32 +#include "7zWindows.h" +#endif +*/ + +#include +static int g_allocCount = 0; +static int g_allocCountTemp = 0; + +static void Print_Alloc(const char *s, size_t size, int *counter) +{ + const unsigned size2 = (unsigned)size; + fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2); + (*counter)++; +} +static void Print_Free(const char *s, int *counter) +{ + (*counter)--; + fprintf(stderr, "\n%s count = %10d", s, *counter); +} +#endif + +void *SzAlloc(ISzAllocPtr p, size_t size) +{ + UNUSED_VAR(p) + if (size == 0) + return 0; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc", size, &g_allocCount); + #endif + return malloc(size); +} + +void SzFree(ISzAllocPtr p, void *address) +{ + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free ", &g_allocCount); + #endif + free(address); +} + +void *SzAllocTemp(ISzAllocPtr p, size_t size) +{ + UNUSED_VAR(p) + if (size == 0) + return 0; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc_temp", size, &g_allocCountTemp); + /* + #ifdef _WIN32 + return HeapAlloc(GetProcessHeap(), 0, size); + #endif + */ + #endif + return malloc(size); +} + +void SzFreeTemp(ISzAllocPtr p, void *address) +{ + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free_temp ", &g_allocCountTemp); + /* + #ifdef _WIN32 + HeapFree(GetProcessHeap(), 0, address); + return; + #endif + */ + #endif + free(address); +} diff --git a/src/core/fex/7z_C/7zAlloc.h b/src/core/fex/7z_C/7zAlloc.h index 30690401..bedd125c 100644 --- a/src/core/fex/7z_C/7zAlloc.h +++ b/src/core/fex/7z_C/7zAlloc.h @@ -1,15 +1,19 @@ -/* 7zAlloc.h -- Allocation functions -2010-10-29 : Igor Pavlov : Public domain */ - -#ifndef __7Z_ALLOC_H -#define __7Z_ALLOC_H - -#include - -void* SzAlloc(void* p, size_t size); -void SzFree(void* p, void* address); - -void* SzAllocTemp(void* p, size_t size); -void SzFreeTemp(void* p, void* address); - -#endif +/* 7zAlloc.h -- Allocation functions +2023-03-04 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_ALLOC_H +#define ZIP7_INC_7Z_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void *SzAlloc(ISzAllocPtr p, size_t size); +void SzFree(ISzAllocPtr p, void *address); + +void *SzAllocTemp(ISzAllocPtr p, size_t size); +void SzFreeTemp(ISzAllocPtr p, void *address); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/7zArcIn.c b/src/core/fex/7z_C/7zArcIn.c new file mode 100644 index 00000000..84a1c08d --- /dev/null +++ b/src/core/fex/7z_C/7zArcIn.c @@ -0,0 +1,1786 @@ +/* 7zArcIn.c -- 7z Input functions +2023-09-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7z.h" +#include "7zBuf.h" +#include "7zCrc.h" +#include "CpuArch.h" + +#define MY_ALLOC(T, p, size, alloc) \ + { if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; } + +#define MY_ALLOC_ZE(T, p, size, alloc) \ + { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) } + +#define MY_ALLOC_AND_CPY(to, size, from, alloc) \ + { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); } + +#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \ + { if ((size) == 0) to = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } } + +#define k7zMajorVersion 0 + +enum EIdEnum +{ + k7zIdEnd, + k7zIdHeader, + k7zIdArchiveProperties, + k7zIdAdditionalStreamsInfo, + k7zIdMainStreamsInfo, + k7zIdFilesInfo, + k7zIdPackInfo, + k7zIdUnpackInfo, + k7zIdSubStreamsInfo, + k7zIdSize, + k7zIdCRC, + k7zIdFolder, + k7zIdCodersUnpackSize, + k7zIdNumUnpackStream, + k7zIdEmptyStream, + k7zIdEmptyFile, + k7zIdAnti, + k7zIdName, + k7zIdCTime, + k7zIdATime, + k7zIdMTime, + k7zIdWinAttrib, + k7zIdComment, + k7zIdEncodedHeader, + k7zIdStartPos, + k7zIdDummy + // k7zNtSecure, + // k7zParent, + // k7zIsReal +}; + +const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + +#define SzBitUi32s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) +{ + if (num == 0) + { + p->Defs = NULL; + p->Vals = NULL; + } + else + { + MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc) + MY_ALLOC(UInt32, p->Vals, num, alloc) + } + return SZ_OK; +} + +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + +#define SzBitUi64s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + + +static void SzAr_Init(CSzAr *p) +{ + p->NumPackStreams = 0; + p->NumFolders = 0; + + p->PackPositions = NULL; + SzBitUi32s_INIT(&p->FolderCRCs) + + p->FoCodersOffsets = NULL; + p->FoStartPackStreamIndex = NULL; + p->FoToCoderUnpackSizes = NULL; + p->FoToMainUnpackSizeIndex = NULL; + p->CoderUnpackSizes = NULL; + + p->CodersData = NULL; + + p->RangeLimit = 0; +} + +static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->PackPositions); + SzBitUi32s_Free(&p->FolderCRCs, alloc); + + ISzAlloc_Free(alloc, p->FoCodersOffsets); + ISzAlloc_Free(alloc, p->FoStartPackStreamIndex); + ISzAlloc_Free(alloc, p->FoToCoderUnpackSizes); + ISzAlloc_Free(alloc, p->FoToMainUnpackSizeIndex); + ISzAlloc_Free(alloc, p->CoderUnpackSizes); + + ISzAlloc_Free(alloc, p->CodersData); + + SzAr_Init(p); +} + + +void SzArEx_Init(CSzArEx *p) +{ + SzAr_Init(&p->db); + + p->NumFiles = 0; + p->dataPos = 0; + + p->UnpackPositions = NULL; + p->IsDirs = NULL; + + p->FolderToFile = NULL; + p->FileToFolder = NULL; + + p->FileNameOffsets = NULL; + p->FileNames = NULL; + + SzBitUi32s_INIT(&p->CRCs) + SzBitUi32s_INIT(&p->Attribs) + // SzBitUi32s_INIT(&p->Parents) + SzBitUi64s_INIT(&p->MTime) + SzBitUi64s_INIT(&p->CTime) +} + +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->UnpackPositions); + ISzAlloc_Free(alloc, p->IsDirs); + + ISzAlloc_Free(alloc, p->FolderToFile); + ISzAlloc_Free(alloc, p->FileToFolder); + + ISzAlloc_Free(alloc, p->FileNameOffsets); + ISzAlloc_Free(alloc, p->FileNames); + + SzBitUi32s_Free(&p->CRCs, alloc); + SzBitUi32s_Free(&p->Attribs, alloc); + // SzBitUi32s_Free(&p->Parents, alloc); + SzBitUi64s_Free(&p->MTime, alloc); + SzBitUi64s_Free(&p->CTime, alloc); + + SzAr_Free(&p->db, alloc); + SzArEx_Init(p); +} + + +static int TestSignatureCandidate(const Byte *testBytes) +{ + unsigned i; + for (i = 0; i < k7zSignatureSize; i++) + if (testBytes[i] != k7zSignature[i]) + return 0; + return 1; +} + +#define SzData_CLEAR(p) { (p)->Data = NULL; (p)->Size = 0; } + +#define SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) \ + (_sd_)->Size--; dest = *(_sd_)->Data++; + +#define SZ_READ_BYTE_SD(_sd_, dest) \ + if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; \ + SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) + +#define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest) + +#define SZ_READ_BYTE_2(dest) \ + if (sd.Size == 0) return SZ_ERROR_ARCHIVE; \ + sd.Size--; dest = *sd.Data++; + +#define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); } +#define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); } + +#define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \ + dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4); + +static Z7_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) +{ + Byte firstByte, mask; + unsigned i; + UInt32 v; + + SZ_READ_BYTE(firstByte) + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + return SZ_OK; + } + SZ_READ_BYTE(v) + if ((firstByte & 0x40) == 0) + { + *value = (((UInt32)firstByte & 0x3F) << 8) | v; + return SZ_OK; + } + SZ_READ_BYTE(mask) + *value = v | ((UInt32)mask << 8); + mask = 0x20; + for (i = 2; i < 8; i++) + { + Byte b; + if ((firstByte & mask) == 0) + { + const UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1); + *value |= (highPart << (8 * i)); + return SZ_OK; + } + SZ_READ_BYTE(b) + *value |= ((UInt64)b << (8 * i)); + mask >>= 1; + } + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) +{ + Byte firstByte; + UInt64 value64; + if (sd->Size == 0) + return SZ_ERROR_ARCHIVE; + firstByte = *sd->Data; + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + sd->Data++; + sd->Size--; + return SZ_OK; + } + RINOK(ReadNumber(sd, &value64)) + if (value64 >= (UInt32)0x80000000 - 1) + return SZ_ERROR_UNSUPPORTED; + if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4))) + return SZ_ERROR_UNSUPPORTED; + *value = (UInt32)value64; + return SZ_OK; +} + +#define ReadID(sd, value) ReadNumber(sd, value) + +static SRes SkipData(CSzData *sd) +{ + UInt64 size; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, size) + return SZ_OK; +} + +static SRes WaitId(CSzData *sd, UInt32 id) +{ + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == id) + return SZ_OK; + if (type == k7zIdEnd) + return SZ_ERROR_ARCHIVE; + RINOK(SkipData(sd)) + } +} + +static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v) +{ + const UInt32 numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + *v = sd->Data; + SKIP_DATA(sd, numBytes) + return SZ_OK; +} + +static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) +{ + unsigned b = 0; + unsigned m = 0; + UInt32 sum = 0; + for (; numItems != 0; numItems--) + { + if (m == 0) + { + b = *bits++; + m = 8; + } + m--; + sum += (UInt32)((b >> m) & 1); + } + return sum; +} + +static Z7_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc) +{ + Byte allAreDefined; + Byte *v2; + const UInt32 numBytes = (numItems + 7) >> 3; + *v = NULL; + SZ_READ_BYTE(allAreDefined) + if (numBytes == 0) + return SZ_OK; + if (allAreDefined == 0) + { + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc) + SKIP_DATA(sd, numBytes) + return SZ_OK; + } + MY_ALLOC(Byte, *v, numBytes, alloc) + v2 = *v; + memset(v2, 0xFF, (size_t)numBytes); + { + const unsigned numBits = (unsigned)numItems & 7; + if (numBits != 0) + v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits)); + } + return SZ_OK; +} + +static Z7_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + UInt32 i; + CSzData sd; + UInt32 *vals; + const Byte *defs; + MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc) + sd = *sd2; + defs = crcs->Defs; + vals = crcs->Vals; + for (i = 0; i < numItems; i++) + if (SzBitArray_Check(defs, i)) + { + SZ_READ_32(vals[i]) + } + else + vals[i] = 0; + *sd2 = sd; + return SZ_OK; +} + +static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + SzBitUi32s_Free(crcs, alloc); + RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc)) + return ReadUi32s(sd, numItems, crcs, alloc); +} + +static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems) +{ + Byte allAreDefined; + UInt32 numDefined = numItems; + SZ_READ_BYTE(allAreDefined) + if (!allAreDefined) + { + const size_t numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + numDefined = CountDefinedBits(sd->Data, numItems); + SKIP_DATA(sd, numBytes) + } + if (numDefined > (sd->Size >> 2)) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, (size_t)numDefined * 4) + return SZ_OK; +} + +static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc) +{ + RINOK(SzReadNumber32(sd, &p->NumPackStreams)) + + RINOK(WaitId(sd, k7zIdSize)) + MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc) + { + UInt64 sum = 0; + UInt32 i; + const UInt32 numPackStreams = p->NumPackStreams; + for (i = 0; i < numPackStreams; i++) + { + UInt64 packSize; + p->PackPositions[i] = sum; + RINOK(ReadNumber(sd, &packSize)) + sum += packSize; + if (sum < packSize) + return SZ_ERROR_ARCHIVE; + } + p->PackPositions[i] = sum; + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + /* CRC of packed streams is unused now */ + RINOK(SkipBitUi32s(sd, p->NumPackStreams)) + continue; + } + RINOK(SkipData(sd)) + } +} + +/* +static SRes SzReadSwitch(CSzData *sd) +{ + Byte external; + RINOK(SzReadByte(sd, &external)); + return (external == 0) ? SZ_OK: SZ_ERROR_UNSUPPORTED; +} +*/ + +#define k_NumCodersStreams_in_Folder_MAX (SZ_NUM_BONDS_IN_FOLDER_MAX + SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) +{ + UInt32 numCoders, i; + UInt32 numInStreams = 0; + const Byte *dataStart = sd->Data; + + f->NumCoders = 0; + f->NumBonds = 0; + f->NumPackStreams = 0; + f->UnpackStream = 0; + + RINOK(SzReadNumber32(sd, &numCoders)) + if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numCoders; i++) + { + Byte mainByte; + CSzCoderInfo *coder = f->Coders + i; + unsigned idSize, j; + UInt64 id; + + SZ_READ_BYTE(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + + idSize = (unsigned)(mainByte & 0xF); + if (idSize > sizeof(id)) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd->Size) + return SZ_ERROR_ARCHIVE; + id = 0; + for (j = 0; j < idSize; j++) + { + id = ((id << 8) | *sd->Data); + sd->Data++; + sd->Size--; + } + if (id > (UInt32)0xFFFFFFFF) + return SZ_ERROR_UNSUPPORTED; + coder->MethodID = (UInt32)id; + + coder->NumStreams = 1; + coder->PropsOffset = 0; + coder->PropsSize = 0; + + if ((mainByte & 0x10) != 0) + { + UInt32 numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + coder->NumStreams = (Byte)numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coder->NumStreams; + + if (numInStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize = 0; + RINOK(SzReadNumber32(sd, &propsSize)) + if (propsSize > sd->Size) + return SZ_ERROR_ARCHIVE; + if (propsSize >= 0x80) + return SZ_ERROR_UNSUPPORTED; + coder->PropsOffset = (size_t)(sd->Data - dataStart); + coder->PropsSize = (Byte)propsSize; + sd->Data += (size_t)propsSize; + sd->Size -= (size_t)propsSize; + } + } + + /* + if (numInStreams == 1 && numCoders == 1) + { + f->NumPackStreams = 1; + f->PackStreams[0] = 0; + } + else + */ + { + Byte streamUsed[k_NumCodersStreams_in_Folder_MAX]; + UInt32 numBonds, numPackStreams; + + numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + if (numBonds > SZ_NUM_BONDS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumBonds = numBonds; + + numPackStreams = numInStreams - numBonds; + if (numPackStreams > SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumPackStreams = numPackStreams; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + + if (numBonds != 0) + { + Byte coderUsed[SZ_NUM_CODERS_IN_FOLDER_MAX]; + + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + CSzBond *bp = f->Bonds + i; + + RINOK(SzReadNumber32(sd, &bp->InIndex)) + if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex]) + return SZ_ERROR_ARCHIVE; + streamUsed[bp->InIndex] = True; + + RINOK(SzReadNumber32(sd, &bp->OutIndex)) + if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex]) + return SZ_ERROR_ARCHIVE; + coderUsed[bp->OutIndex] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + f->UnpackStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + if (numPackStreams == 1) + { + for (i = 0; i < numInStreams; i++) + if (!streamUsed[i]) + break; + if (i == numInStreams) + return SZ_ERROR_ARCHIVE; + f->PackStreams[0] = i; + } + else + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + f->PackStreams[i] = index; + } + } + + f->NumCoders = numCoders; + + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) +{ + CSzData sd; + sd = *sd2; + for (; num != 0; num--) + { + Byte firstByte, mask; + unsigned i; + SZ_READ_BYTE_2(firstByte) + if ((firstByte & 0x80) == 0) + continue; + if ((firstByte & 0x40) == 0) + { + if (sd.Size == 0) + return SZ_ERROR_ARCHIVE; + sd.Size--; + sd.Data++; + continue; + } + mask = 0x20; + for (i = 2; i < 8 && (firstByte & mask) != 0; i++) + mask >>= 1; + if (i > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, i) + } + *sd2 = sd; + return SZ_OK; +} + + +#define k_Scan_NumCoders_MAX 64 +#define k_Scan_NumCodersStreams_in_Folder_MAX 64 + + +static SRes ReadUnpackInfo(CSzAr *p, + CSzData *sd2, + UInt32 numFoldersMax, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + + UInt32 fo, numFolders, numCodersOutStreams, packStreamIndex; + const Byte *startBufPtr; + Byte external; + + RINOK(WaitId(sd2, k7zIdFolder)) + + RINOK(SzReadNumber32(sd2, &numFolders)) + if (numFolders > numFoldersMax) + return SZ_ERROR_UNSUPPORTED; + p->NumFolders = numFolders; + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc) + MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc) + + startBufPtr = sd.Data; + + packStreamIndex = 0; + numCodersOutStreams = 0; + + for (fo = 0; fo < numFolders; fo++) + { + UInt32 numCoders, ci, numInStreams = 0; + + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); + + RINOK(SzReadNumber32(&sd, &numCoders)) + if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (ci = 0; ci < numCoders; ci++) + { + Byte mainByte; + unsigned idSize; + UInt32 coderInStreams; + + SZ_READ_BYTE_2(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + idSize = (mainByte & 0xF); + if (idSize > 8) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, idSize) + + coderInStreams = 1; + + if ((mainByte & 0x10) != 0) + { + UInt32 coderOutStreams; + RINOK(SzReadNumber32(&sd, &coderInStreams)) + RINOK(SzReadNumber32(&sd, &coderOutStreams)) + if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coderInStreams; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize; + RINOK(SzReadNumber32(&sd, &propsSize)) + if (propsSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, propsSize) + } + } + + { + UInt32 indexOfMainStream = 0; + UInt32 numPackStreams = 1; + + if (numCoders != 1 || numInStreams != 1) + { + Byte streamUsed[k_Scan_NumCodersStreams_in_Folder_MAX]; + Byte coderUsed[k_Scan_NumCoders_MAX]; + + UInt32 i; + const UInt32 numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + + if (numInStreams > k_Scan_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + UInt32 index; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numCoders || coderUsed[index]) + return SZ_ERROR_ARCHIVE; + coderUsed[index] = True; + } + + numPackStreams = numInStreams - numBonds; + + if (numPackStreams != 1) + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + indexOfMainStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + p->FoToMainUnpackSizeIndex[fo] = (Byte)indexOfMainStream; + numCodersOutStreams += numCoders; + if (numCodersOutStreams < numCoders) + return SZ_ERROR_UNSUPPORTED; + if (numPackStreams > p->NumPackStreams - packStreamIndex) + return SZ_ERROR_ARCHIVE; + packStreamIndex += numPackStreams; + } + } + + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + + { + const size_t dataSize = (size_t)(sd.Data - startBufPtr); + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoCodersOffsets[fo] = dataSize; + MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc) + } + + if (external != 0) + { + if (sd.Size != 0) + return SZ_ERROR_ARCHIVE; + sd = *sd2; + } + + RINOK(WaitId(&sd, k7zIdCodersUnpackSize)) + + MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc) + { + UInt32 i; + for (i = 0; i < numCodersOutStreams; i++) + { + RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i)) + } + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(&sd, &type)) + if (type == k7zIdEnd) + { + *sd2 = sd; + return SZ_OK; + } + if (type == k7zIdCRC) + { + RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc)) + continue; + } + RINOK(SkipData(&sd)) + } +} + + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex) +{ + return p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex] + p->FoToMainUnpackSizeIndex[folderIndex]]; +} + + +typedef struct +{ + UInt32 NumTotalSubStreams; + UInt32 NumSubDigests; + CSzData sdNumSubStreams; + CSzData sdSizes; + CSzData sdCRCs; +} CSubStreamInfo; + + +static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) +{ + UInt64 type = 0; + UInt32 numSubDigests = 0; + const UInt32 numFolders = p->NumFolders; + UInt32 numUnpackStreams = numFolders; + UInt32 numUnpackSizesInData = 0; + + for (;;) + { + RINOK(ReadID(sd, &type)) + if (type == k7zIdNumUnpackStream) + { + UInt32 i; + ssi->sdNumSubStreams.Data = sd->Data; + numUnpackStreams = 0; + numSubDigests = 0; + for (i = 0; i < numFolders; i++) + { + UInt32 numStreams; + RINOK(SzReadNumber32(sd, &numStreams)) + if (numUnpackStreams > numUnpackStreams + numStreams) + return SZ_ERROR_UNSUPPORTED; + numUnpackStreams += numStreams; + if (numStreams != 0) + numUnpackSizesInData += (numStreams - 1); + if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) + numSubDigests += numStreams; + } + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); + continue; + } + if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + if (!ssi->sdNumSubStreams.Data) + { + numSubDigests = numFolders; + if (p->FolderCRCs.Defs) + numSubDigests = numFolders - CountDefinedBits(p->FolderCRCs.Defs, numFolders); + } + + ssi->NumTotalSubStreams = numUnpackStreams; + ssi->NumSubDigests = numSubDigests; + + if (type == k7zIdSize) + { + ssi->sdSizes.Data = sd->Data; + RINOK(SkipNumbers(sd, numUnpackSizesInData)) + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); + RINOK(ReadID(sd, &type)) + } + + for (;;) + { + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + ssi->sdCRCs.Data = sd->Data; + RINOK(SkipBitUi32s(sd, numSubDigests)) + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); + } + else + { + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } +} + +static SRes SzReadStreamsInfo(CSzAr *p, + CSzData *sd, + UInt32 numFoldersMax, const CBuf *tempBufs, UInt32 numTempBufs, + UInt64 *dataOffset, + CSubStreamInfo *ssi, + ISzAllocPtr alloc) +{ + UInt64 type; + + SzData_CLEAR(&ssi->sdSizes) + SzData_CLEAR(&ssi->sdCRCs) + SzData_CLEAR(&ssi->sdNumSubStreams) + + *dataOffset = 0; + RINOK(ReadID(sd, &type)) + if (type == k7zIdPackInfo) + { + RINOK(ReadNumber(sd, dataOffset)) + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; + RINOK(ReadPackInfo(p, sd, alloc)) + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdUnpackInfo) + { + RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc)) + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdSubStreamsInfo) + { + RINOK(ReadSubStreamsInfo(p, sd, ssi)) + RINOK(ReadID(sd, &type)) + } + else + { + ssi->NumTotalSubStreams = p->NumFolders; + // ssi->NumSubDigests = 0; + } + + return (type == k7zIdEnd ? SZ_OK : SZ_ERROR_UNSUPPORTED); +} + +static SRes SzReadAndDecodePackedStreams( + ILookInStreamPtr inStream, + CSzData *sd, + CBuf *tempBufs, + UInt32 numFoldersMax, + UInt64 baseOffset, + CSzAr *p, + ISzAllocPtr allocTemp) +{ + UInt64 dataStartPos; + UInt32 fo; + CSubStreamInfo ssi; + + RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp)) + + dataStartPos += baseOffset; + if (p->NumFolders == 0) + return SZ_ERROR_ARCHIVE; + + for (fo = 0; fo < p->NumFolders; fo++) + Buf_Init(tempBufs + fo); + + for (fo = 0; fo < p->NumFolders; fo++) + { + CBuf *tempBuf = tempBufs + fo; + const UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo); + if ((size_t)unpackSize != unpackSize) + return SZ_ERROR_MEM; + if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp)) + return SZ_ERROR_MEM; + } + + for (fo = 0; fo < p->NumFolders; fo++) + { + const CBuf *tempBuf = tempBufs + fo; + RINOK(LookInStream_SeekTo(inStream, dataStartPos)) + RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp)) + } + + return SZ_OK; +} + +static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size_t *offsets) +{ + size_t pos = 0; + *offsets++ = 0; + if (numFiles == 0) + return (size == 0) ? SZ_OK : SZ_ERROR_ARCHIVE; + if (size < 2) + return SZ_ERROR_ARCHIVE; + if (data[size - 2] != 0 || data[size - 1] != 0) + return SZ_ERROR_ARCHIVE; + do + { + const Byte *p; + if (pos == size) + return SZ_ERROR_ARCHIVE; + for (p = data + pos; + #ifdef _WIN32 + *(const UInt16 *)(const void *)p != 0 + #else + p[0] != 0 || p[1] != 0 + #endif + ; p += 2); + pos = (size_t)(p - data) + 2; + *offsets++ = (pos >> 1); + } + while (--numFiles); + return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; +} + +static Z7_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, + CSzData *sd2, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + UInt32 i; + CNtfsFileTime *vals; + Byte *defs; + Byte external; + + RINOK(ReadBitVector(sd2, num, &p->Defs, alloc)) + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc) + vals = p->Vals; + defs = p->Defs; + for (i = 0; i < num; i++) + if (SzBitArray_Check(defs, i)) + { + if (sd.Size < 8) + return SZ_ERROR_ARCHIVE; + vals[i].Low = GetUi32(sd.Data); + vals[i].High = GetUi32(sd.Data + 4); + SKIP_DATA2(sd, 8) + } + else + vals[i].High = vals[i].Low = 0; + + if (external == 0) + *sd2 = sd; + + return SZ_OK; +} + + +#define NUM_ADDITIONAL_STREAMS_MAX 8 + + +static SRes SzReadHeader2( + CSzArEx *p, /* allocMain */ + CSzData *sd, + ILookInStreamPtr inStream, + CBuf *tempBufs, UInt32 *numTempBufs, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp + ) +{ + CSubStreamInfo ssi; + +{ + UInt64 type; + + SzData_CLEAR(&ssi.sdSizes) + SzData_CLEAR(&ssi.sdCRCs) + SzData_CLEAR(&ssi.sdNumSubStreams) + + ssi.NumSubDigests = 0; + ssi.NumTotalSubStreams = 0; + + RINOK(ReadID(sd, &type)) + + if (type == k7zIdArchiveProperties) + { + for (;;) + { + UInt64 type2; + RINOK(ReadID(sd, &type2)) + if (type2 == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdAdditionalStreamsInfo) + { + CSzAr tempAr; + SRes res; + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, + p->startPosAfterHeader, &tempAr, allocTemp); + *numTempBufs = tempAr.NumFolders; + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + return res; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdMainStreamsInfo) + { + RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs, + &p->dataPos, &ssi, allocMain)) + p->dataPos += p->startPosAfterHeader; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdEnd) + { + return SZ_OK; + } + + if (type != k7zIdFilesInfo) + return SZ_ERROR_ARCHIVE; +} + +{ + UInt32 numFiles = 0; + UInt32 numEmptyStreams = 0; + const Byte *emptyStreams = NULL; + const Byte *emptyFiles = NULL; + + RINOK(SzReadNumber32(sd, &numFiles)) + p->NumFiles = numFiles; + + for (;;) + { + UInt64 type; + UInt64 size; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + + if (type >= ((UInt32)1 << 8)) + { + SKIP_DATA(sd, size) + } + else switch ((unsigned)type) + { + case k7zIdName: + { + size_t namesSize; + const Byte *namesData; + Byte external; + + SZ_READ_BYTE(external) + if (external == 0) + { + namesSize = (size_t)size - 1; + namesData = sd->Data; + } + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + namesData = (tempBufs)[index].data; + namesSize = (tempBufs)[index].size; + } + + if ((namesSize & 1) != 0) + return SZ_ERROR_ARCHIVE; + MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain) + MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain) + RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets)) + if (external == 0) + { + SKIP_DATA(sd, namesSize) + } + break; + } + case k7zIdEmptyStream: + { + RINOK(RememberBitVector(sd, numFiles, &emptyStreams)) + numEmptyStreams = CountDefinedBits(emptyStreams, numFiles); + emptyFiles = NULL; + break; + } + case k7zIdEmptyFile: + { + RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles)) + break; + } + case k7zIdWinAttrib: + { + Byte external; + CSzData sdSwitch; + CSzData *sdPtr; + SzBitUi32s_Free(&p->Attribs, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain)) + + SZ_READ_BYTE(external) + if (external == 0) + sdPtr = sd; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + sdSwitch.Data = (tempBufs)[index].data; + sdSwitch.Size = (tempBufs)[index].size; + sdPtr = &sdSwitch; + } + RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain)) + break; + } + /* + case k7zParent: + { + SzBitUi32s_Free(&p->Parents, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Parents.Defs, allocMain)); + RINOK(SzReadSwitch(sd)); + RINOK(ReadUi32s(sd, numFiles, &p->Parents, allocMain)); + break; + } + */ + case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + default: + { + SKIP_DATA(sd, size) + } + } + } + + if (numFiles - numEmptyStreams != ssi.NumTotalSubStreams) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + { + UInt32 i; + UInt32 emptyFileIndex = 0; + UInt32 folderIndex = 0; + UInt32 remSubStreams = 0; + UInt32 numSubStreams = 0; + UInt64 unpackPos = 0; + const Byte *digestsDefs = NULL; + const Byte *digestsVals = NULL; + UInt32 digestIndex = 0; + Byte isDirMask = 0; + Byte crcMask = 0; + Byte mask = 0x80; + + MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain) + MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain) + MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain) + MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain) + + RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain)) + + if (ssi.sdCRCs.Size != 0) + { + Byte allDigestsDefined = 0; + SZ_READ_BYTE_SD_NOCHECK(&ssi.sdCRCs, allDigestsDefined) + if (allDigestsDefined) + digestsVals = ssi.sdCRCs.Data; + else + { + const size_t numBytes = (ssi.NumSubDigests + 7) >> 3; + digestsDefs = ssi.sdCRCs.Data; + digestsVals = digestsDefs + numBytes; + } + } + + for (i = 0; i < numFiles; i++, mask >>= 1) + { + if (mask == 0) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + isDirMask = 0; + crcMask = 0; + mask = 0x80; + } + + p->UnpackPositions[i] = unpackPos; + p->CRCs.Vals[i] = 0; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + { + if (emptyFiles) + { + if (!SzBitArray_Check(emptyFiles, emptyFileIndex)) + isDirMask |= mask; + emptyFileIndex++; + } + else + isDirMask |= mask; + if (remSubStreams == 0) + { + p->FileToFolder[i] = (UInt32)-1; + continue; + } + } + + if (remSubStreams == 0) + { + for (;;) + { + if (folderIndex >= p->db.NumFolders) + return SZ_ERROR_ARCHIVE; + p->FolderToFile[folderIndex] = i; + numSubStreams = 1; + if (ssi.sdNumSubStreams.Data) + { + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + } + remSubStreams = numSubStreams; + if (numSubStreams != 0) + break; + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + folderIndex++; + } + } + + p->FileToFolder[i] = folderIndex; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + continue; + + if (--remSubStreams == 0) + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]]; + if (folderUnpackSize < unpackPos - startFolderUnpackPos) + return SZ_ERROR_ARCHIVE; + unpackPos = startFolderUnpackPos + folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + + if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, folderIndex)) + { + p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex]; + crcMask |= mask; + } + folderIndex++; + } + else + { + UInt64 v; + RINOK(ReadNumber(&ssi.sdSizes, &v)) + unpackPos += v; + if (unpackPos < v) + return SZ_ERROR_ARCHIVE; + } + if ((crcMask & mask) == 0 && digestsVals) + { + if (!digestsDefs || SzBitArray_Check(digestsDefs, digestIndex)) + { + p->CRCs.Vals[i] = GetUi32(digestsVals); + digestsVals += 4; + crcMask |= mask; + } + digestIndex++; + } + } + + if (mask != 0x80) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + } + + p->UnpackPositions[i] = unpackPos; + + if (remSubStreams != 0) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + p->FolderToFile[folderIndex] = i; + if (folderIndex >= p->db.NumFolders) + break; + if (!ssi.sdNumSubStreams.Data) + return SZ_ERROR_ARCHIVE; + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + if (numSubStreams != 0) + return SZ_ERROR_ARCHIVE; + /* + { + UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + */ + folderIndex++; + } + + if (ssi.sdNumSubStreams.Data && ssi.sdNumSubStreams.Size != 0) + return SZ_ERROR_ARCHIVE; + } +} + return SZ_OK; +} + + +static SRes SzReadHeader( + CSzArEx *p, + CSzData *sd, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + UInt32 i; + UInt32 numTempBufs = 0; + SRes res; + CBuf tempBufs[NUM_ADDITIONAL_STREAMS_MAX]; + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Init(tempBufs + i); + + res = SzReadHeader2(p, sd, inStream, + tempBufs, &numTempBufs, + allocMain, allocTemp); + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Free(tempBufs + i, allocTemp); + + RINOK(res) + + if (sd->Size != 0) + return SZ_ERROR_FAIL; + + return res; +} + +static SRes SzArEx_Open2( + CSzArEx *p, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + Byte header[k7zStartHeaderSize]; + Int64 startArcPos; + UInt64 nextHeaderOffset, nextHeaderSize; + size_t nextHeaderSizeT; + UInt32 nextHeaderCRC; + CBuf buf; + SRes res; + + startArcPos = 0; + RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR)) + + RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)) + + if (!TestSignatureCandidate(header)) + return SZ_ERROR_NO_ARCHIVE; + if (header[6] != k7zMajorVersion) + return SZ_ERROR_UNSUPPORTED; + + nextHeaderOffset = GetUi64(header + 12); + nextHeaderSize = GetUi64(header + 20); + nextHeaderCRC = GetUi32(header + 28); + + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; + + if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) + return SZ_ERROR_CRC; + + p->db.RangeLimit = nextHeaderOffset; + + nextHeaderSizeT = (size_t)nextHeaderSize; + if (nextHeaderSizeT != nextHeaderSize) + return SZ_ERROR_MEM; + if (nextHeaderSizeT == 0) + return SZ_OK; + if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize || + nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize) + return SZ_ERROR_NO_ARCHIVE; + + { + Int64 pos = 0; + RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + return SZ_ERROR_INPUT_EOF; + } + + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)) + + if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) + return SZ_ERROR_MEM; + + res = LookInStream_Read(inStream, buf.data, nextHeaderSizeT); + + if (res == SZ_OK) + { + res = SZ_ERROR_ARCHIVE; + if (CrcCalc(buf.data, nextHeaderSizeT) == nextHeaderCRC) + { + CSzData sd; + UInt64 type; + sd.Data = buf.data; + sd.Size = buf.size; + + res = ReadID(&sd, &type); + + if (res == SZ_OK && type == k7zIdEncodedHeader) + { + CSzAr tempAr; + CBuf tempBuf; + Buf_Init(&tempBuf); + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + { + Buf_Free(&tempBuf, allocTemp); + } + else + { + Buf_Free(&buf, allocTemp); + buf.data = tempBuf.data; + buf.size = tempBuf.size; + sd.Data = buf.data; + sd.Size = buf.size; + res = ReadID(&sd, &type); + } + } + + if (res == SZ_OK) + { + if (type == k7zIdHeader) + { + /* + CSzData sd2; + unsigned ttt; + for (ttt = 0; ttt < 40000; ttt++) + { + SzArEx_Free(p, allocMain); + sd2 = sd; + res = SzReadHeader(p, &sd2, inStream, allocMain, allocTemp); + if (res != SZ_OK) + break; + } + */ + res = SzReadHeader(p, &sd, inStream, allocMain, allocTemp); + } + else + res = SZ_ERROR_UNSUPPORTED; + } + } + } + + Buf_Free(&buf, allocTemp); + return res; +} + + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp) +{ + const SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); + if (res != SZ_OK) + SzArEx_Free(p, allocMain); + return res; +} + + +SRes SzArEx_Extract( + const CSzArEx *p, + ILookInStreamPtr inStream, + UInt32 fileIndex, + UInt32 *blockIndex, + Byte **tempBuf, + size_t *outBufferSize, + size_t *offset, + size_t *outSizeProcessed, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + const UInt32 folderIndex = p->FileToFolder[fileIndex]; + SRes res = SZ_OK; + + *offset = 0; + *outSizeProcessed = 0; + + if (folderIndex == (UInt32)-1) + { + ISzAlloc_Free(allocMain, *tempBuf); + *blockIndex = folderIndex; + *tempBuf = NULL; + *outBufferSize = 0; + return SZ_OK; + } + + if (*tempBuf == NULL || *blockIndex != folderIndex) + { + const UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + /* + UInt64 unpackSizeSpec = + p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] - + p->UnpackPositions[p->FolderToFile[folderIndex]]; + */ + const size_t unpackSize = (size_t)unpackSizeSpec; + + if (unpackSize != unpackSizeSpec) + return SZ_ERROR_MEM; + *blockIndex = folderIndex; + ISzAlloc_Free(allocMain, *tempBuf); + *tempBuf = NULL; + + if (res == SZ_OK) + { + *outBufferSize = unpackSize; + if (unpackSize != 0) + { + *tempBuf = (Byte *)ISzAlloc_Alloc(allocMain, unpackSize); + if (*tempBuf == NULL) + res = SZ_ERROR_MEM; + } + + if (res == SZ_OK) + { + res = SzAr_DecodeFolder(&p->db, folderIndex, + inStream, p->dataPos, *tempBuf, unpackSize, allocTemp); + } + } + } + + if (res == SZ_OK) + { + const UInt64 unpackPos = p->UnpackPositions[fileIndex]; + *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]); + *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos); + if (*offset + *outSizeProcessed > *outBufferSize) + return SZ_ERROR_FAIL; + if (SzBitWithVals_Check(&p->CRCs, fileIndex)) + if (CrcCalc(*tempBuf + *offset, *outSizeProcessed) != p->CRCs.Vals[fileIndex]) + res = SZ_ERROR_CRC; + } + + return res; +} + + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + const size_t offs = p->FileNameOffsets[fileIndex]; + const size_t len = p->FileNameOffsets[fileIndex + 1] - offs; + if (dest != 0) + { + size_t i; + const Byte *src = p->FileNames + offs * 2; + for (i = 0; i < len; i++) + dest[i] = GetUi16(src + i * 2); + } + return len; +} + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex) +{ + size_t len; + if (!p->FileNameOffsets) + return 1; + len = 0; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + len += p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return len; + fileIndex = parent; + } +} + +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + BoolInt needSlash; + if (!p->FileNameOffsets) + { + *(--dest) = 0; + return dest; + } + needSlash = False; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + size_t curLen = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + SzArEx_GetFileNameUtf16(p, fileIndex, dest - curLen); + if (needSlash) + *(dest - 1) = '/'; + needSlash = True; + dest -= curLen; + + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return dest; + fileIndex = parent; + } +} +*/ diff --git a/src/core/fex/7z_C/7zBuf.c b/src/core/fex/7z_C/7zBuf.c index e85b1318..438bba68 100644 --- a/src/core/fex/7z_C/7zBuf.c +++ b/src/core/fex/7z_C/7zBuf.c @@ -1,34 +1,36 @@ -/* 7zBuf.c -- Byte Buffer -2008-03-28 -Igor Pavlov -Public domain */ - -#include "7zBuf.h" - -void Buf_Init(CBuf* p) -{ - p->data = 0; - p->size = 0; -} - -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc) -{ - p->size = 0; - if (size == 0) { - p->data = 0; - return 1; - } - p->data = (Byte*)alloc->Alloc(alloc, size); - if (p->data != 0) { - p->size = size; - return 1; - } - return 0; -} - -void Buf_Free(CBuf* p, ISzAlloc* alloc) -{ - alloc->Free(alloc, p->data); - p->data = 0; - p->size = 0; -} +/* 7zBuf.c -- Byte Buffer +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zBuf.h" + +void Buf_Init(CBuf *p) +{ + p->data = 0; + p->size = 0; +} + +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc) +{ + p->size = 0; + if (size == 0) + { + p->data = 0; + return 1; + } + p->data = (Byte *)ISzAlloc_Alloc(alloc, size); + if (p->data) + { + p->size = size; + return 1; + } + return 0; +} + +void Buf_Free(CBuf *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->data); + p->data = 0; + p->size = 0; +} diff --git a/src/core/fex/7z_C/7zBuf.h b/src/core/fex/7z_C/7zBuf.h index bc82eb90..ca34c190 100644 --- a/src/core/fex/7z_C/7zBuf.h +++ b/src/core/fex/7z_C/7zBuf.h @@ -1,39 +1,35 @@ -/* 7zBuf.h -- Byte Buffer -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __7Z_BUF_H -#define __7Z_BUF_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - Byte* data; - size_t size; -} CBuf; - -void Buf_Init(CBuf* p); -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc); -void Buf_Free(CBuf* p, ISzAlloc* alloc); - -typedef struct -{ - Byte* data; - size_t size; - size_t pos; -} CDynBuf; - -void DynBuf_Construct(CDynBuf* p); -void DynBuf_SeekToBeg(CDynBuf* p); -int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc); -void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc); - -#ifdef __cplusplus -} -#endif - -#endif +/* 7zBuf.h -- Byte Buffer +2023-03-04 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_BUF_H +#define ZIP7_INC_7Z_BUF_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef struct +{ + Byte *data; + size_t size; +} CBuf; + +void Buf_Init(CBuf *p); +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc); +void Buf_Free(CBuf *p, ISzAllocPtr alloc); + +typedef struct +{ + Byte *data; + size_t size; + size_t pos; +} CDynBuf; + +void DynBuf_Construct(CDynBuf *p); +void DynBuf_SeekToBeg(CDynBuf *p); +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc); +void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/7zBuf2.c b/src/core/fex/7z_C/7zBuf2.c new file mode 100644 index 00000000..49b4343b --- /dev/null +++ b/src/core/fex/7z_C/7zBuf2.c @@ -0,0 +1,52 @@ +/* 7zBuf2.c -- Byte Buffer +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zBuf.h" + +void DynBuf_Construct(CDynBuf *p) +{ + p->data = 0; + p->size = 0; + p->pos = 0; +} + +void DynBuf_SeekToBeg(CDynBuf *p) +{ + p->pos = 0; +} + +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc) +{ + if (size > p->size - p->pos) + { + size_t newSize = p->pos + size; + Byte *data; + newSize += newSize / 4; + data = (Byte *)ISzAlloc_Alloc(alloc, newSize); + if (!data) + return 0; + p->size = newSize; + if (p->pos != 0) + memcpy(data, p->data, p->pos); + ISzAlloc_Free(alloc, p->data); + p->data = data; + } + if (size != 0) + { + memcpy(p->data + p->pos, buf, size); + p->pos += size; + } + return 1; +} + +void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->data); + p->data = 0; + p->size = 0; + p->pos = 0; +} diff --git a/src/core/fex/7z_C/7zCrc.c b/src/core/fex/7z_C/7zCrc.c index 3a3aec95..9be9a75a 100644 --- a/src/core/fex/7z_C/7zCrc.c +++ b/src/core/fex/7z_C/7zCrc.c @@ -1,80 +1,420 @@ -/* 7zCrc.c -- CRC32 init -2010-12-01 : Igor Pavlov : Public domain */ - -#include "7zCrc.h" -#include "CpuArch.h" - -#define kCrcPoly 0xEDB88320 - -#ifdef MY_CPU_X86_OR_AMD64 -#define CRC_NUM_TABLES 8 -UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void* data, size_t size, const UInt32* table); -#elif defined(MY_CPU_LE) -#define CRC_NUM_TABLES 4 -#else -#define CRC_NUM_TABLES 5 -#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) -UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void* data, size_t size, const UInt32* table); -#endif - -#ifndef MY_CPU_BE -UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void* data, size_t size, const UInt32* table); -#endif - -typedef UInt32(MY_FAST_CALL* CRC_FUNC)(UInt32 v, const void* data, size_t size, const UInt32* table); - -static CRC_FUNC g_CrcUpdate; -UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void* data, size_t size) -{ - return g_CrcUpdate(v, data, size, g_CrcTable); -} - -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size) -{ - return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL; -} - -void MY_FAST_CALL CrcGenerateTable() -{ - UInt32 i; - for (i = 0; i < 256; i++) { - UInt32 r = i; - unsigned j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - g_CrcTable[i] = r; - } - for (; i < 256 * CRC_NUM_TABLES; i++) { - UInt32 r = g_CrcTable[i - 256]; - g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); - } - -#ifdef MY_CPU_LE - - g_CrcUpdate = CrcUpdateT4; - -#if CRC_NUM_TABLES == 8 - if (!CPU_Is_InOrder()) - g_CrcUpdate = CrcUpdateT8; -#endif - -#else - { -#ifndef MY_CPU_BE - UInt32 k = 1; - if (*(const Byte*)&k == 1) - g_CrcUpdate = CrcUpdateT4; - else -#endif - { - for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--) { - UInt32 x = g_CrcTable[i - 256]; - g_CrcTable[i] = CRC_UINT32_SWAP(x); - } - g_CrcUpdate = CrcUpdateT1_BeT4; - } - } -#endif -} +/* 7zCrc.c -- CRC32 calculation and init +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zCrc.h" +#include "CpuArch.h" + +// for debug: +// #define __ARM_FEATURE_CRC32 1 + +#ifdef __ARM_FEATURE_CRC32 +// #pragma message("__ARM_FEATURE_CRC32") +#define Z7_CRC_HW_FORCE +#endif + +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#ifdef Z7_CRC_HW_FORCE + #define Z7_CRC_NUM_TABLES_USE 1 +#else +#ifdef Z7_CRC_NUM_TABLES + #define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else + #define Z7_CRC_NUM_TABLES_USE 12 +#endif +#endif + +#if Z7_CRC_NUM_TABLES_USE < 1 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + +#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1) + #define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE +#else + #define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1) +#endif + +#ifndef Z7_CRC_HW_FORCE + +#if Z7_CRC_NUM_TABLES_USE == 1 \ + || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1 +static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size) +{ + const UInt32 *table = g_CrcTable; + const Byte *p = (const Byte *)data; + const Byte *lim = p + size; + for (; p != lim; p++) + v = CRC_UPDATE_BYTE_2(v, *p); + return v; +} +#endif + + +#if Z7_CRC_NUM_TABLES_USE != 1 +#ifndef MY_CPU_BE + #define FUNC_NAME_LE_2(s) CrcUpdateT ## s + #define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s) + #define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#ifndef MY_CPU_LE + #define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s + #define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s) + #define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#endif + +#endif // Z7_CRC_HW_FORCE + +/* ---------- hardware CRC ---------- */ + +#ifdef MY_CPU_LE + +#if defined(MY_CPU_ARM_OR_ARM64) +// #pragma message("ARM*") + + #if (defined(__clang__) && (__clang_major__ >= 3)) \ + || defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #if !defined(__ARM_FEATURE_CRC32) +// #pragma message("!defined(__ARM_FEATURE_CRC32)") +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define __ARM_FEATURE_CRC32 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRC32_WAS_SET + #if defined(__clang__) + #if defined(MY_CPU_ARM64) + #define ATTRIB_CRC __attribute__((__target__("crc"))) + #else + #define ATTRIB_CRC __attribute__((__target__("armv8-a,crc"))) + #endif + #else + #if defined(MY_CPU_ARM64) +#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000) + #define ATTRIB_CRC __attribute__((__target__("+crc"))) +#endif + #else +#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8) +#if defined(__ARM_FP) && __GNUC__ >= 8 +// for -mfloat-abi=hard: similar to + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd"))) +#else + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) +#endif +#endif + #endif + #endif + #endif + #if defined(__ARM_FEATURE_CRC32) + // #pragma message("") +/* +arm_acle.h (GGC): + before Nov 17, 2017: +#ifdef __ARM_FEATURE_CRC32 + + Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked" +#if __ARM_ARCH >= 8 +#pragma GCC target ("arch=armv8-a+crc") + + Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1: +#ifdef __ARM_FEATURE_CRC32 +#ifdef __ARM_FP +#pragma GCC target ("arch=armv8-a+crc+simd") +#else +#pragma GCC target ("arch=armv8-a+crc") +#endif +*/ +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 +#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") +#undef __ARM_ARCH +#define __ARM_ARCH 8 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif + #define Z7_CRC_HW_USE + #include + #endif + #elif defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #ifdef __clang__ + // #define Z7_CRC_HW_USE + // #include + #else + #define Z7_CRC_HW_USE + #include + #endif + #endif + #endif + #endif + +#else // non-ARM* + +// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code +#ifdef Z7_CRC_HW_USE +#include "7zCrcEmu.h" +#endif + +#endif // non-ARM* + + + +#if defined(Z7_CRC_HW_USE) + +// #pragma message("USE ARM HW CRC") + +#ifdef MY_CPU_64BIT + #define CRC_HW_WORD_TYPE UInt64 + #define CRC_HW_WORD_FUNC __crc32d +#else + #define CRC_HW_WORD_TYPE UInt32 + #define CRC_HW_WORD_FUNC __crc32w +#endif + +#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4) + +#ifdef ATTRIB_CRC + ATTRIB_CRC +#endif +Z7_NO_INLINE +#ifdef Z7_CRC_HW_FORCE + UInt32 Z7_FASTCALL CrcUpdate +#else + static UInt32 Z7_FASTCALL CrcUpdate_HW +#endif + (UInt32 v, const void *data, size_t size) +{ + const Byte *p = (const Byte *)data; + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + if (size >= CRC_HW_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= CRC_HW_UNROLL_BYTES - 1; + lim -= size; + do + { + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#undef __ARM_FEATURE_CRC32 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#undef Z7_ARM_FEATURE_CRC32_WAS_SET +#endif + +#endif // defined(Z7_CRC_HW_USE) +#endif // MY_CPU_LE + + + +#ifndef Z7_CRC_HW_FORCE + +#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) +/* +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC) + (UInt32 v, const void *data, size_t size, const UInt32 *table); +Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate; +*/ +static unsigned g_Crc_Algo; +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) +static unsigned g_Crc_Be; +#endif +#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + + + +Z7_NO_INLINE +#ifdef Z7_CRC_HW_USE + static UInt32 Z7_FASTCALL CrcUpdate_Base +#else + UInt32 Z7_FASTCALL CrcUpdate +#endif + (UInt32 crc, const void *data, size_t size) +{ +#if Z7_CRC_NUM_TABLES_USE == 1 + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#else // Z7_CRC_NUM_TABLES_USE != 1 +#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME + if (g_Crc_Algo == 1) + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#endif + +#ifdef MY_CPU_LE + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#elif defined(MY_CPU_BE) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); +#else + if (g_Crc_Be) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); + else + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#endif +#endif // Z7_CRC_NUM_TABLES_USE != 1 +} + + +#ifdef Z7_CRC_HW_USE +Z7_NO_INLINE +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size) +{ + if (g_Crc_Algo == 0) + return CrcUpdate_HW(crc, data, size); + return CrcUpdate_Base(crc, data, size); +} +#endif + +#endif // !defined(Z7_CRC_HW_FORCE) + + + +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) +{ + return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL; +} + + +MY_ALIGN(64) +UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL]; + + +void Z7_FASTCALL CrcGenerateTable(void) +{ + UInt32 i; + for (i = 0; i < 256; i++) + { +#if defined(Z7_CRC_HW_FORCE) + g_CrcTable[i] = __crc32b(i, 0); +#else + #define kCrcPoly 0xEDB88320 + UInt32 r = i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + g_CrcTable[i] = r; +#endif + } + for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++) + { + const UInt32 r = g_CrcTable[(size_t)i - 256]; + g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); + } + +#if !defined(Z7_CRC_HW_FORCE) && \ + (defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE)) + +#if Z7_CRC_NUM_TABLES_USE <= 1 + g_Crc_Algo = 1; +#else // Z7_CRC_NUM_TABLES_USE <= 1 + +#if defined(MY_CPU_LE) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#else // !defined(MY_CPU_LE) + { +#ifndef MY_CPU_BE + UInt32 k = 0x01020304; + const Byte *p = (const Byte *)&k; + if (p[0] == 4 && p[1] == 3) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; + else if (p[0] != 1 || p[1] != 2) + g_Crc_Algo = 1; + else +#endif // MY_CPU_BE + { + for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--) + { + const UInt32 x = g_CrcTable[(size_t)i - 256]; + g_CrcTable[i] = Z7_BSWAP32(x); + } +#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#endif +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) + g_Crc_Be = 1; +#endif + } + } +#endif // !defined(MY_CPU_LE) + +#ifdef MY_CPU_LE +#ifdef Z7_CRC_HW_USE + if (CPU_IsSupported_CRC32()) + g_Crc_Algo = 0; +#endif // Z7_CRC_HW_USE +#endif // MY_CPU_LE + +#endif // Z7_CRC_NUM_TABLES_USE <= 1 +#endif // g_Crc_Algo was declared +} + +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo) +{ + if (algo == 0) + return &CrcUpdate; + +#if defined(Z7_CRC_HW_USE) + if (algo == sizeof(CRC_HW_WORD_TYPE) * 8) + { +#ifdef Z7_CRC_HW_FORCE + return &CrcUpdate; +#else + if (g_Crc_Algo == 0) + return &CrcUpdate_HW; +#endif + } +#endif + +#ifndef Z7_CRC_HW_FORCE + if (algo == Z7_CRC_NUM_TABLES_USE) + return + #ifdef Z7_CRC_HW_USE + &CrcUpdate_Base; + #else + &CrcUpdate; + #endif +#endif + + return NULL; +} + +#undef kCrcPoly +#undef Z7_CRC_NUM_TABLES_USE +#undef Z7_CRC_NUM_TABLES_TOTAL +#undef CRC_UPDATE_BYTE_2 +#undef FUNC_NAME_LE_2 +#undef FUNC_NAME_LE_1 +#undef FUNC_NAME_LE +#undef FUNC_NAME_BE_2 +#undef FUNC_NAME_BE_1 +#undef FUNC_NAME_BE + +#undef CRC_HW_UNROLL_BYTES +#undef CRC_HW_WORD_FUNC +#undef CRC_HW_WORD_TYPE diff --git a/src/core/fex/7z_C/7zCrc.h b/src/core/fex/7z_C/7zCrc.h index c596c1a4..ed042358 100644 --- a/src/core/fex/7z_C/7zCrc.h +++ b/src/core/fex/7z_C/7zCrc.h @@ -1,25 +1,28 @@ -/* 7zCrc.h -- CRC32 calculation -2009-11-21 : Igor Pavlov : Public domain */ - -#ifndef __7Z_CRC_H -#define __7Z_CRC_H - -#include "Types.h" - -EXTERN_C_BEGIN - -extern UInt32 g_CrcTable[]; - -/* Call CrcGenerateTable one time before other CRC functions */ -void MY_FAST_CALL CrcGenerateTable(void); - -#define CRC_INIT_VAL 0xFFFFFFFF -#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) -#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void* data, size_t size); -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size); - -EXTERN_C_END - -#endif +/* 7zCrc.h -- CRC32 calculation +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_CRC_H +#define ZIP7_INC_7Z_CRC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +extern UInt32 g_CrcTable[]; + +/* Call CrcGenerateTable one time before other CRC functions */ +void Z7_FASTCALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); + +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size); +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/7zCrcOpt.c b/src/core/fex/7z_C/7zCrcOpt.c index 5fbfe0f4..bbb61f54 100644 --- a/src/core/fex/7z_C/7zCrcOpt.c +++ b/src/core/fex/7z_C/7zCrcOpt.c @@ -1,53 +1,199 @@ -/* 7zCrcOpt.c -- CRC32 calculation -2010-12-01 : Igor Pavlov : Public domain */ - -#include "CpuArch.h" - -#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) - -#ifndef MY_CPU_BE - -UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void* data, size_t size, const UInt32* table) -{ - const Byte* p = (const Byte*)data; - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32*)p; - v = table[0x300 + (v & 0xFF)] ^ table[0x200 + ((v >> 8) & 0xFF)] ^ table[0x100 + ((v >> 16) & 0xFF)] ^ table[0x000 + ((v >> 24))]; - } - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - return v; -} - -UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void* data, size_t size, const UInt32* table) -{ - return CrcUpdateT4(v, data, size, table); -} - -#endif - -#ifndef MY_CPU_LE - -#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) - -UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void* data, size_t size, const UInt32* table) -{ - const Byte* p = (const Byte*)data; - for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - v = CRC_UINT32_SWAP(v); - table += 0x100; - for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32*)p; - v = table[0x000 + (v & 0xFF)] ^ table[0x100 + ((v >> 8) & 0xFF)] ^ table[0x200 + ((v >> 16) & 0xFF)] ^ table[0x300 + ((v >> 24))]; - } - table -= 0x100; - v = CRC_UINT32_SWAP(v); - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE_2(v, *p); - return v; -} - -#endif +/* 7zCrcOpt.c -- CRC32 calculation (optimized functions) +2023-12-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1 + +// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c +#ifdef Z7_CRC_NUM_TABLES +#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else +#define Z7_CRC_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC_NUM_TABLES_USE % 4 || \ + Z7_CRC_NUM_TABLES_USE < 4 * 1 || \ + Z7_CRC_NUM_TABLES_USE > 4 * 6 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + + +#ifndef MY_CPU_BE + +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +#define Q(n, d) \ + ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) + +#define R(a) *((const UInt32 *)(const void *)p + (a)) + +#define CRC_FUNC_PRE_LE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_LE(step) \ + CRC_FUNC_PRE_LE2(step); \ + CRC_FUNC_PRE_LE2(step) + +CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC_UPDATE_BYTE_2(v, *p); + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) + { + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC_UPDATE_BYTE_2(v, *p); + return v; +} + +#undef CRC_UPDATE_BYTE_2 +#undef R +#undef Q +#undef CRC_FUNC_PRE_LE +#undef CRC_FUNC_PRE_LE2 + +#endif + + + + +#ifndef MY_CPU_LE + +#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8)) + +#define Q(n, d) \ + ( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) + +#ifdef Z7_CRC_DEBUG_BE + #define R(a) GetBe32a((const UInt32 *)(const void *)p + (a)) +#else + #define R(a) *((const UInt32 *)(const void *)p + (a)) +#endif + + +#define CRC_FUNC_PRE_BE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_BE(step) \ + CRC_FUNC_PRE_BE2(step); \ + CRC_FUNC_PRE_BE2(step) + +CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + table += 0x100; + v = Z7_BSWAP32(v); + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC_UPDATE_BYTE_2_BE(v, *p); + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) + { + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC_UPDATE_BYTE_2_BE(v, *p); + return Z7_BSWAP32(v); +} + +#undef CRC_UPDATE_BYTE_2_BE +#undef R +#undef Q +#undef CRC_FUNC_PRE_BE +#undef CRC_FUNC_PRE_BE2 + +#endif +#undef Z7_CRC_NUM_TABLES_USE +#endif diff --git a/src/core/fex/7z_C/7zDec.c b/src/core/fex/7z_C/7zDec.c index ea709a63..01afc330 100644 --- a/src/core/fex/7z_C/7zDec.c +++ b/src/core/fex/7z_C/7zDec.c @@ -1,413 +1,673 @@ -/* 7zDec.c -- Decoding from 7z folder -2010-11-02 : Igor Pavlov : Public domain */ - -#include - -/* #define _7ZIP_PPMD_SUPPPORT */ - -#include "7z.h" - -#include "Bcj2.h" -#include "Bra.h" -#include "CpuArch.h" -#include "Lzma2Dec.h" -#include "LzmaDec.h" -#ifdef _7ZIP_PPMD_SUPPPORT -#include "Ppmd7.h" -#endif - -#define k_Copy 0 -#define k_LZMA2 0x21 -#define k_LZMA 0x30101 -#define k_BCJ 0x03030103 -#define k_PPC 0x03030205 -#define k_ARM 0x03030501 -#define k_ARMT 0x03030701 -#define k_SPARC 0x03030805 -#define k_BCJ2 0x0303011B - -#ifdef _7ZIP_PPMD_SUPPPORT - -#define k_PPMD 0x30401 - -typedef struct -{ - IByteIn p; - const Byte* cur; - const Byte* end; - const Byte* begin; - UInt64 processed; - Bool extra; - SRes res; - ILookInStream* inStream; -} CByteInToLook; - -static Byte ReadByte(void* pp) -{ - CByteInToLook* p = (CByteInToLook*)pp; - if (p->cur != p->end) - return *p->cur++; - if (p->res == SZ_OK) { - size_t size = p->cur - p->begin; - p->processed += size; - p->res = p->inStream->Skip(p->inStream, size); - size = (1 << 25); - p->res = p->inStream->Look(p->inStream, (const void**)&p->begin, &size); - p->cur = p->begin; - p->end = p->begin + size; - if (size != 0) - return *p->cur++; - ; - } - p->extra = True; - return 0; -} - -static SRes SzDecodePpmd(CSzCoderInfo* coder, UInt64 inSize, ILookInStream* inStream, - Byte* outBuffer, SizeT outSize, ISzAlloc* allocMain) -{ - CPpmd7 ppmd; - CByteInToLook s; - SRes res = SZ_OK; - - s.p.Read = ReadByte; - s.inStream = inStream; - s.begin = s.end = s.cur = NULL; - s.extra = False; - s.res = SZ_OK; - s.processed = 0; - - if (coder->Props.size != 5) - return SZ_ERROR_UNSUPPORTED; - - { - unsigned order = coder->Props.data[0]; - UInt32 memSize = GetUi32(coder->Props.data + 1); - if (order < PPMD7_MIN_ORDER || order > PPMD7_MAX_ORDER || memSize < PPMD7_MIN_MEM_SIZE || memSize > PPMD7_MAX_MEM_SIZE) - return SZ_ERROR_UNSUPPORTED; - Ppmd7_Construct(&ppmd); - if (!Ppmd7_Alloc(&ppmd, memSize, allocMain)) - return SZ_ERROR_MEM; - Ppmd7_Init(&ppmd, order); - } - { - CPpmd7z_RangeDec rc; - Ppmd7z_RangeDec_CreateVTable(&rc); - rc.Stream = &s.p; - if (!Ppmd7z_RangeDec_Init(&rc)) - res = SZ_ERROR_DATA; - else if (s.extra) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else { - SizeT i; - for (i = 0; i < outSize; i++) { - int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.p); - if (s.extra || sym < 0) - break; - outBuffer[i] = (Byte)sym; - } - if (i != outSize) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) - res = SZ_ERROR_DATA; - } - } - Ppmd7_Free(&ppmd, allocMain); - return res; -} - -#endif - -static SRes SzDecodeLzma(CSzCoderInfo* coder, UInt64 inSize, ILookInStream* inStream, - Byte* outBuffer, SizeT outSize, ISzAlloc* allocMain) -{ - CLzmaDec state; - SRes res = SZ_OK; - - LzmaDec_Construct(&state); - RINOK(LzmaDec_AllocateProbs(&state, coder->Props.data, (unsigned)coder->Props.size, allocMain)); - state.dic = outBuffer; - state.dicBufSize = outSize; - LzmaDec_Init(&state); - - for (;;) { - Byte* inBuf = NULL; - size_t lookahead = (1 << 18); - if (lookahead > inSize) - lookahead = (size_t)inSize; - res = inStream->Look((void*)inStream, (const void**)&inBuf, &lookahead); - if (res != SZ_OK) - break; - - { - SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos; - ELzmaStatus status; - res = LzmaDec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status); - lookahead -= inProcessed; - inSize -= inProcessed; - if (res != SZ_OK) - break; - if (state.dicPos == state.dicBufSize || (inProcessed == 0 && dicPos == state.dicPos)) { - if (state.dicBufSize != outSize || lookahead != 0 || (status != LZMA_STATUS_FINISHED_WITH_MARK && status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)) - res = SZ_ERROR_DATA; - break; - } - res = inStream->Skip((void*)inStream, inProcessed); - if (res != SZ_OK) - break; - } - } - - LzmaDec_FreeProbs(&state, allocMain); - return res; -} - -static SRes SzDecodeLzma2(CSzCoderInfo* coder, UInt64 inSize, ILookInStream* inStream, - Byte* outBuffer, SizeT outSize, ISzAlloc* allocMain) -{ - CLzma2Dec state; - SRes res = SZ_OK; - - Lzma2Dec_Construct(&state); - if (coder->Props.size != 1) - return SZ_ERROR_DATA; - RINOK(Lzma2Dec_AllocateProbs(&state, coder->Props.data[0], allocMain)); - state.decoder.dic = outBuffer; - state.decoder.dicBufSize = outSize; - Lzma2Dec_Init(&state); - - for (;;) { - Byte* inBuf = NULL; - size_t lookahead = (1 << 18); - if (lookahead > inSize) - lookahead = (size_t)inSize; - res = inStream->Look((void*)inStream, (const void**)&inBuf, &lookahead); - if (res != SZ_OK) - break; - - { - SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos; - ELzmaStatus status; - res = Lzma2Dec_DecodeToDic(&state, outSize, inBuf, &inProcessed, LZMA_FINISH_END, &status); - lookahead -= inProcessed; - inSize -= inProcessed; - if (res != SZ_OK) - break; - if (state.decoder.dicPos == state.decoder.dicBufSize || (inProcessed == 0 && dicPos == state.decoder.dicPos)) { - if (state.decoder.dicBufSize != outSize || lookahead != 0 || (status != LZMA_STATUS_FINISHED_WITH_MARK)) - res = SZ_ERROR_DATA; - break; - } - res = inStream->Skip((void*)inStream, inProcessed); - if (res != SZ_OK) - break; - } - } - - Lzma2Dec_FreeProbs(&state, allocMain); - return res; -} - -static SRes SzDecodeCopy(UInt64 inSize, ILookInStream* inStream, Byte* outBuffer) -{ - while (inSize > 0) { - void* inBuf; - size_t curSize = (1 << 18); - if (curSize > inSize) - curSize = (size_t)inSize; - RINOK(inStream->Look((void*)inStream, (const void**)&inBuf, &curSize)); - if (curSize == 0) - return SZ_ERROR_INPUT_EOF; - memcpy(outBuffer, inBuf, curSize); - outBuffer += curSize; - inSize -= curSize; - RINOK(inStream->Skip((void*)inStream, curSize)); - } - return SZ_OK; -} - -static Bool IS_MAIN_METHOD(UInt32 m) -{ - switch (m) { - case k_Copy: - case k_LZMA: - case k_LZMA2: -#ifdef _7ZIP_PPMD_SUPPPORT - case k_PPMD: -#endif - return True; - } - return False; -} - -static Bool IS_SUPPORTED_CODER(const CSzCoderInfo* c) -{ - return c->NumInStreams == 1 && c->NumOutStreams == 1 && c->MethodID <= (UInt32)0xFFFFFFFF && IS_MAIN_METHOD((UInt32)c->MethodID); -} - -#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumInStreams == 4 && (c)->NumOutStreams == 1) - -static SRes CheckSupportedFolder(const CSzFolder* f) -{ - if (f->NumCoders < 1 || f->NumCoders > 4) - return SZ_ERROR_UNSUPPORTED; - if (!IS_SUPPORTED_CODER(&f->Coders[0])) - return SZ_ERROR_UNSUPPORTED; - if (f->NumCoders == 1) { - if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBindPairs != 0) - return SZ_ERROR_UNSUPPORTED; - return SZ_OK; - } - if (f->NumCoders == 2) { - CSzCoderInfo* c = &f->Coders[1]; - if (c->MethodID > (UInt32)0xFFFFFFFF || c->NumInStreams != 1 || c->NumOutStreams != 1 || f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBindPairs != 1 || f->BindPairs[0].InIndex != 1 || f->BindPairs[0].OutIndex != 0) - return SZ_ERROR_UNSUPPORTED; - switch ((UInt32)c->MethodID) { - case k_BCJ: - case k_ARM: - break; - default: - return SZ_ERROR_UNSUPPORTED; - } - return SZ_OK; - } - if (f->NumCoders == 4) { - if (!IS_SUPPORTED_CODER(&f->Coders[1]) || !IS_SUPPORTED_CODER(&f->Coders[2]) || !IS_BCJ2(&f->Coders[3])) - return SZ_ERROR_UNSUPPORTED; - if (f->NumPackStreams != 4 || f->PackStreams[0] != 2 || f->PackStreams[1] != 6 || f->PackStreams[2] != 1 || f->PackStreams[3] != 0 || f->NumBindPairs != 3 || f->BindPairs[0].InIndex != 5 || f->BindPairs[0].OutIndex != 0 || f->BindPairs[1].InIndex != 4 || f->BindPairs[1].OutIndex != 1 || f->BindPairs[2].InIndex != 3 || f->BindPairs[2].OutIndex != 2) - return SZ_ERROR_UNSUPPORTED; - return SZ_OK; - } - return SZ_ERROR_UNSUPPORTED; -} - -static UInt64 GetSum(const UInt64* values, UInt32 index) -{ - UInt64 sum = 0; - UInt32 i; - for (i = 0; i < index; i++) - sum += values[i]; - return sum; -} - -#define CASE_BRA_CONV(isa) \ - case k_##isa: \ - isa##_Convert(outBuffer, outSize, 0, 0); \ - break; - -static SRes SzFolder_Decode2(const CSzFolder* folder, const UInt64* packSizes, - ILookInStream* inStream, UInt64 startPos, - Byte* outBuffer, SizeT outSize, ISzAlloc* allocMain, - Byte* tempBuf[]) -{ - UInt32 ci; - SizeT tempSizes[3] = { 0, 0, 0 }; - SizeT tempSize3 = 0; - Byte* tempBuf3 = 0; - - RINOK(CheckSupportedFolder(folder)); - - for (ci = 0; ci < folder->NumCoders; ci++) { - CSzCoderInfo* coder = &folder->Coders[ci]; - - if (IS_MAIN_METHOD((UInt32)coder->MethodID)) { - UInt32 si = 0; - UInt64 offset; - UInt64 inSize; - Byte* outBufCur = outBuffer; - SizeT outSizeCur = outSize; - if (folder->NumCoders == 4) { - UInt32 indices[] = { 3, 2, 0 }; - UInt64 unpackSize = folder->UnpackSizes[ci]; - si = indices[ci]; - if (ci < 2) { - Byte* temp; - outSizeCur = (SizeT)unpackSize; - if (outSizeCur != unpackSize) - return SZ_ERROR_MEM; - temp = (Byte*)IAlloc_Alloc(allocMain, outSizeCur); - if (temp == 0 && outSizeCur != 0) - return SZ_ERROR_MEM; - outBufCur = tempBuf[1 - ci] = temp; - tempSizes[1 - ci] = outSizeCur; - } else if (ci == 2) { - if (unpackSize > outSize) /* check it */ - return SZ_ERROR_PARAM; - tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize); - tempSize3 = outSizeCur = (SizeT)unpackSize; - } else - return SZ_ERROR_UNSUPPORTED; - } - offset = GetSum(packSizes, si); - inSize = packSizes[si]; - RINOK(LookInStream_SeekTo(inStream, startPos + offset)); - - if (coder->MethodID == k_Copy) { - if (inSize != outSizeCur) /* check it */ - return SZ_ERROR_DATA; - RINOK(SzDecodeCopy(inSize, inStream, outBufCur)); - } else if (coder->MethodID == k_LZMA) { - RINOK(SzDecodeLzma(coder, inSize, inStream, outBufCur, outSizeCur, allocMain)); - } else if (coder->MethodID == k_LZMA2) { - RINOK(SzDecodeLzma2(coder, inSize, inStream, outBufCur, outSizeCur, allocMain)); - } else { -#ifdef _7ZIP_PPMD_SUPPPORT - RINOK(SzDecodePpmd(coder, inSize, inStream, outBufCur, outSizeCur, allocMain)); -#else - return SZ_ERROR_UNSUPPORTED; -#endif - } - } else if (coder->MethodID == k_BCJ2) { - UInt64 offset = GetSum(packSizes, 1); - UInt64 s3Size = packSizes[1]; - SRes res; - if (ci != 3) - return SZ_ERROR_UNSUPPORTED; - RINOK(LookInStream_SeekTo(inStream, startPos + offset)); - tempSizes[2] = (SizeT)s3Size; - if (tempSizes[2] != s3Size) - return SZ_ERROR_MEM; - tempBuf[2] = (Byte*)IAlloc_Alloc(allocMain, tempSizes[2]); - if (tempBuf[2] == 0 && tempSizes[2] != 0) - return SZ_ERROR_MEM; - res = SzDecodeCopy(s3Size, inStream, tempBuf[2]); - RINOK(res) - - res = Bcj2_Decode( - tempBuf3, tempSize3, - tempBuf[0], tempSizes[0], - tempBuf[1], tempSizes[1], - tempBuf[2], tempSizes[2], - outBuffer, outSize); - RINOK(res) - } else { - if (ci != 1) - return SZ_ERROR_UNSUPPORTED; - switch (coder->MethodID) { - case k_BCJ: { - UInt32 state; - x86_Convert_Init(state); - x86_Convert(outBuffer, outSize, 0, &state, 0); - break; - } - CASE_BRA_CONV(ARM) - default: - return SZ_ERROR_UNSUPPORTED; - } - } - } - return SZ_OK; -} - -SRes SzFolder_Decode(const CSzFolder* folder, const UInt64* packSizes, - ILookInStream* inStream, UInt64 startPos, - Byte* outBuffer, size_t outSize, ISzAlloc* allocMain) -{ - Byte* tempBuf[3] = { 0, 0, 0 }; - int i; - SRes res = SzFolder_Decode2(folder, packSizes, inStream, startPos, - outBuffer, (SizeT)outSize, allocMain, tempBuf); - for (i = 0; i < 3; i++) - IAlloc_Free(allocMain, tempBuf[i]); - return res; -} +/* 7zDec.c -- Decoding from 7z folder +: Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define Z7_PPMD_SUPPORT */ + +#include "7z.h" +#include "7zCrc.h" + +#include "Bcj2.h" +#include "Bra.h" +#include "CpuArch.h" +#include "Delta.h" +#include "LzmaDec.h" +#include "Lzma2Dec.h" +#ifdef Z7_PPMD_SUPPORT +#include "Ppmd7.h" +#endif + +#define k_Copy 0 +#ifndef Z7_NO_METHOD_LZMA2 +#define k_LZMA2 0x21 +#endif +#define k_LZMA 0x30101 +#define k_BCJ2 0x303011B + +#if !defined(Z7_NO_METHODS_FILTERS) +#define Z7_USE_BRANCH_FILTER +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64) +#define Z7_USE_FILTER_ARM64 +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARM64 0xa +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT) +#define Z7_USE_FILTER_ARMT +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARMT 0x3030701 +#endif + +#ifndef Z7_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_RISCV 0xb +#define k_BCJ 0x3030103 +#define k_PPC 0x3030205 +#define k_IA64 0x3030401 +#define k_ARM 0x3030501 +#define k_SPARC 0x3030805 +#endif + +#ifdef Z7_PPMD_SUPPORT + +#define k_PPMD 0x30401 + +typedef struct +{ + IByteIn vt; + const Byte *cur; + const Byte *end; + const Byte *begin; + UInt64 processed; + BoolInt extra; + SRes res; + ILookInStreamPtr inStream; +} CByteInToLook; + +static Byte ReadByte(IByteInPtr pp) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook) + if (p->cur != p->end) + return *p->cur++; + if (p->res == SZ_OK) + { + size_t size = (size_t)(p->cur - p->begin); + p->processed += size; + p->res = ILookInStream_Skip(p->inStream, size); + size = (1 << 25); + p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size); + p->cur = p->begin; + p->end = p->begin + size; + if (size != 0) + return *p->cur++; + } + p->extra = True; + return 0; +} + +static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CPpmd7 ppmd; + CByteInToLook s; + SRes res = SZ_OK; + + s.vt.Read = ReadByte; + s.inStream = inStream; + s.begin = s.end = s.cur = NULL; + s.extra = False; + s.res = SZ_OK; + s.processed = 0; + + if (propsSize != 5) + return SZ_ERROR_UNSUPPORTED; + + { + unsigned order = props[0]; + UInt32 memSize = GetUi32(props + 1); + if (order < PPMD7_MIN_ORDER || + order > PPMD7_MAX_ORDER || + memSize < PPMD7_MIN_MEM_SIZE || + memSize > PPMD7_MAX_MEM_SIZE) + return SZ_ERROR_UNSUPPORTED; + Ppmd7_Construct(&ppmd); + if (!Ppmd7_Alloc(&ppmd, memSize, allocMain)) + return SZ_ERROR_MEM; + Ppmd7_Init(&ppmd, order); + } + { + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) + res = SZ_ERROR_DATA; + else if (!s.extra) + { + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) + { + int sym = Ppmd7z_DecodeSymbol(&ppmd); + if (s.extra || sym < 0) + break; + *buf = (Byte)sym; + } + if (buf != lim) + res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ + res = SZ_ERROR_DATA; + } + } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; + } + Ppmd7_Free(&ppmd, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzmaDec state; + SRes res = SZ_OK; + + LzmaDec_CONSTRUCT(&state) + RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)) + state.dic = outBuffer; + state.dicBufSize = outSize; + LzmaDec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos; + ELzmaStatus status; + res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) + break; + + if (inProcessed == 0 && dicPos == state.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + LzmaDec_FreeProbs(&state, allocMain); + return res; +} + + +#ifndef Z7_NO_METHOD_LZMA2 + +static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzma2Dec state; + SRes res = SZ_OK; + + Lzma2Dec_CONSTRUCT(&state) + if (propsSize != 1) + return SZ_ERROR_DATA; + RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)) + state.decoder.dic = outBuffer; + state.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos; + ELzmaStatus status; + res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.decoder.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (inProcessed == 0 && dicPos == state.decoder.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + Lzma2Dec_FreeProbs(&state, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer) +{ + while (inSize > 0) + { + const void *inBuf; + size_t curSize = (1 << 18); + if (curSize > inSize) + curSize = (size_t)inSize; + RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)) + if (curSize == 0) + return SZ_ERROR_INPUT_EOF; + memcpy(outBuffer, inBuf, curSize); + outBuffer += curSize; + inSize -= curSize; + RINOK(ILookInStream_Skip(inStream, curSize)) + } + return SZ_OK; +} + +static BoolInt IS_MAIN_METHOD(UInt32 m) +{ + switch (m) + { + case k_Copy: + case k_LZMA: + #ifndef Z7_NO_METHOD_LZMA2 + case k_LZMA2: + #endif + #ifdef Z7_PPMD_SUPPORT + case k_PPMD: + #endif + return True; + default: + return False; + } +} + +static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c) +{ + return + c->NumStreams == 1 + /* && c->MethodID <= (UInt32)0xFFFFFFFF */ + && IS_MAIN_METHOD((UInt32)c->MethodID); +} + +#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4) + +static SRes CheckSupportedFolder(const CSzFolder *f) +{ + if (f->NumCoders < 1 || f->NumCoders > 4) + return SZ_ERROR_UNSUPPORTED; + if (!IS_SUPPORTED_CODER(&f->Coders[0])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumCoders == 1) + { + if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + + #if defined(Z7_USE_BRANCH_FILTER) + + if (f->NumCoders == 2) + { + const CSzCoderInfo *c = &f->Coders[1]; + if ( + /* c->MethodID > (UInt32)0xFFFFFFFF || */ + c->NumStreams != 1 + || f->NumPackStreams != 1 + || f->PackStreams[0] != 0 + || f->NumBonds != 1 + || f->Bonds[0].InIndex != 1 + || f->Bonds[0].OutIndex != 0) + return SZ_ERROR_UNSUPPORTED; + switch ((UInt32)c->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_Delta: + case k_BCJ: + case k_PPC: + case k_IA64: + case k_SPARC: + case k_ARM: + case k_RISCV: + #endif + #ifdef Z7_USE_FILTER_ARM64 + case k_ARM64: + #endif + #ifdef Z7_USE_FILTER_ARMT + case k_ARMT: + #endif + break; + default: + return SZ_ERROR_UNSUPPORTED; + } + return SZ_OK; + } + + #endif + + + if (f->NumCoders == 4) + { + if (!IS_SUPPORTED_CODER(&f->Coders[1]) + || !IS_SUPPORTED_CODER(&f->Coders[2]) + || !IS_BCJ2(&f->Coders[3])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumPackStreams != 4 + || f->PackStreams[0] != 2 + || f->PackStreams[1] != 6 + || f->PackStreams[2] != 1 + || f->PackStreams[3] != 0 + || f->NumBonds != 3 + || f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0 + || f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1 + || f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + return SZ_ERROR_UNSUPPORTED; +} + + + + + + +static SRes SzFolder_Decode2(const CSzFolder *folder, + const Byte *propsData, + const UInt64 *unpackSizes, + const UInt64 *packPositions, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain, + Byte *tempBuf[]) +{ + UInt32 ci; + SizeT tempSizes[3] = { 0, 0, 0}; + SizeT tempSize3 = 0; + Byte *tempBuf3 = 0; + + RINOK(CheckSupportedFolder(folder)) + + for (ci = 0; ci < folder->NumCoders; ci++) + { + const CSzCoderInfo *coder = &folder->Coders[ci]; + + if (IS_MAIN_METHOD((UInt32)coder->MethodID)) + { + UInt32 si = 0; + UInt64 offset; + UInt64 inSize; + Byte *outBufCur = outBuffer; + SizeT outSizeCur = outSize; + if (folder->NumCoders == 4) + { + const UInt32 indices[] = { 3, 2, 0 }; + const UInt64 unpackSize = unpackSizes[ci]; + si = indices[ci]; + if (ci < 2) + { + Byte *temp; + outSizeCur = (SizeT)unpackSize; + if (outSizeCur != unpackSize) + return SZ_ERROR_MEM; + temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur); + if (!temp && outSizeCur != 0) + return SZ_ERROR_MEM; + outBufCur = tempBuf[1 - ci] = temp; + tempSizes[1 - ci] = outSizeCur; + } + else if (ci == 2) + { + if (unpackSize > outSize) /* check it */ + return SZ_ERROR_PARAM; + tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize); + tempSize3 = outSizeCur = (SizeT)unpackSize; + } + else + return SZ_ERROR_UNSUPPORTED; + } + offset = packPositions[si]; + inSize = packPositions[(size_t)si + 1] - offset; + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + + if (coder->MethodID == k_Copy) + { + if (inSize != outSizeCur) /* check it */ + return SZ_ERROR_DATA; + RINOK(SzDecodeCopy(inSize, inStream, outBufCur)) + } + else if (coder->MethodID == k_LZMA) + { + RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #ifndef Z7_NO_METHOD_LZMA2 + else if (coder->MethodID == k_LZMA2) + { + RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + #ifdef Z7_PPMD_SUPPORT + else if (coder->MethodID == k_PPMD) + { + RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + else + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->MethodID == k_BCJ2) + { + const UInt64 offset = packPositions[1]; + const UInt64 s3Size = packPositions[2] - offset; + + if (ci != 3) + return SZ_ERROR_UNSUPPORTED; + + tempSizes[2] = (SizeT)s3Size; + if (tempSizes[2] != s3Size) + return SZ_ERROR_MEM; + tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]); + if (!tempBuf[2] && tempSizes[2] != 0) + return SZ_ERROR_MEM; + + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])) + + if ((tempSizes[0] & 3) != 0 || + (tempSizes[1] & 3) != 0 || + tempSize3 + tempSizes[0] + tempSizes[1] != outSize) + return SZ_ERROR_DATA; + + { + CBcj2Dec p; + + p.bufs[0] = tempBuf3; p.lims[0] = tempBuf3 + tempSize3; + p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0]; + p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1]; + p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2]; + + p.dest = outBuffer; + p.destLim = outBuffer + outSize; + + Bcj2Dec_Init(&p); + RINOK(Bcj2Dec_Decode(&p)) + + { + unsigned i; + for (i = 0; i < 4; i++) + if (p.bufs[i] != p.lims[i]) + return SZ_ERROR_DATA; + if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p)) + return SZ_ERROR_DATA; + } + } + } +#if defined(Z7_USE_BRANCH_FILTER) + else if (ci == 1) + { +#if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_Delta) + { + if (coder->PropsSize != 1) + return SZ_ERROR_UNSUPPORTED; + { + Byte state[DELTA_STATE_SIZE]; + Delta_Init(state); + Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize); + } + continue; + } +#endif + +#ifdef Z7_USE_FILTER_ARM64 + if (coder->MethodID == k_ARM64) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + { + pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 3) + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); + continue; + } +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_RISCV) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + { + pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 1) + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc); + continue; + } +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + { + if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + #define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0; + switch (coder->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_BCJ: + { + UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 + break; + } + case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0; + // CASE_BRA_CONV(PPC) + CASE_BRA_CONV(IA64) + CASE_BRA_CONV(SPARC) + CASE_BRA_CONV(ARM) + #endif + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + CASE_BRA_CONV(ARMT) + #endif + default: + return SZ_ERROR_UNSUPPORTED; + } + continue; + } +#endif + } // (c == 1) +#endif // Z7_USE_BRANCH_FILTER + else + return SZ_ERROR_UNSUPPORTED; + } + + return SZ_OK; +} + + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain) +{ + SRes res; + CSzFolder folder; + CSzData sd; + + const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex]; + sd.Data = data; + sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex]; + + res = SzGetNextFolderItem(&folder, &sd); + + if (res != SZ_OK) + return res; + + if (sd.Size != 0 + || folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex] + || outSize != SzAr_GetFolderUnpackSize(p, folderIndex)) + return SZ_ERROR_FAIL; + { + unsigned i; + Byte *tempBuf[3] = { 0, 0, 0}; + + res = SzFolder_Decode2(&folder, data, + &p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]], + p->PackPositions + p->FoStartPackStreamIndex[folderIndex], + inStream, startPos, + outBuffer, (SizeT)outSize, allocMain, tempBuf); + + for (i = 0; i < 3; i++) + ISzAlloc_Free(allocMain, tempBuf[i]); + + if (res == SZ_OK) + if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex)) + if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex]) + res = SZ_ERROR_CRC; + + return res; + } +} diff --git a/src/core/fex/7z_C/7zIn.c b/src/core/fex/7z_C/7zIn.c deleted file mode 100644 index cd660a97..00000000 --- a/src/core/fex/7z_C/7zIn.c +++ /dev/null @@ -1,1293 +0,0 @@ -/* 7zIn.c -- 7z Input functions -2010-10-29 : Igor Pavlov : Public domain */ - -#include - -#include "7z.h" -#include "7zCrc.h" -#include "CpuArch.h" - -Byte k7zSignature[k7zSignatureSize] = { '7', 'z', 0xBC, 0xAF, 0x27, 0x1C }; - -#define RINOM(x) \ - { \ - if ((x) == 0) \ - return SZ_ERROR_MEM; \ - } - -#define NUM_FOLDER_CODERS_MAX 32 -#define NUM_CODER_STREAMS_MAX 32 - -void SzCoderInfo_Init(CSzCoderInfo* p) -{ - Buf_Init(&p->Props); -} - -void SzCoderInfo_Free(CSzCoderInfo* p, ISzAlloc* alloc) -{ - Buf_Free(&p->Props, alloc); - SzCoderInfo_Init(p); -} - -void SzFolder_Init(CSzFolder* p) -{ - p->Coders = 0; - p->BindPairs = 0; - p->PackStreams = 0; - p->UnpackSizes = 0; - p->NumCoders = 0; - p->NumBindPairs = 0; - p->NumPackStreams = 0; - p->UnpackCRCDefined = 0; - p->UnpackCRC = 0; - p->NumUnpackStreams = 0; -} - -void SzFolder_Free(CSzFolder* p, ISzAlloc* alloc) -{ - UInt32 i; - if (p->Coders) - for (i = 0; i < p->NumCoders; i++) - SzCoderInfo_Free(&p->Coders[i], alloc); - IAlloc_Free(alloc, p->Coders); - IAlloc_Free(alloc, p->BindPairs); - IAlloc_Free(alloc, p->PackStreams); - IAlloc_Free(alloc, p->UnpackSizes); - SzFolder_Init(p); -} - -UInt32 SzFolder_GetNumOutStreams(CSzFolder* p) -{ - UInt32 result = 0; - UInt32 i; - for (i = 0; i < p->NumCoders; i++) - result += p->Coders[i].NumOutStreams; - return result; -} - -int SzFolder_FindBindPairForInStream(CSzFolder* p, UInt32 inStreamIndex) -{ - UInt32 i; - for (i = 0; i < p->NumBindPairs; i++) - if (p->BindPairs[i].InIndex == inStreamIndex) - return i; - return -1; -} - -int SzFolder_FindBindPairForOutStream(CSzFolder* p, UInt32 outStreamIndex) -{ - UInt32 i; - for (i = 0; i < p->NumBindPairs; i++) - if (p->BindPairs[i].OutIndex == outStreamIndex) - return i; - return -1; -} - -UInt64 SzFolder_GetUnpackSize(CSzFolder* p) -{ - int i = (int)SzFolder_GetNumOutStreams(p); - if (i == 0) - return 0; - for (i--; i >= 0; i--) - if (SzFolder_FindBindPairForOutStream(p, i) < 0) - return p->UnpackSizes[i]; - /* throw 1; */ - return 0; -} - -void SzFile_Init(CSzFileItem* p) -{ - p->HasStream = 1; - p->IsDir = 0; - p->IsAnti = 0; - p->CrcDefined = 0; - p->MTimeDefined = 0; -} - -void SzAr_Init(CSzAr* p) -{ - p->PackSizes = 0; - p->PackCRCsDefined = 0; - p->PackCRCs = 0; - p->Folders = 0; - p->Files = 0; - p->NumPackStreams = 0; - p->NumFolders = 0; - p->NumFiles = 0; -} - -void SzAr_Free(CSzAr* p, ISzAlloc* alloc) -{ - UInt32 i; - if (p->Folders) - for (i = 0; i < p->NumFolders; i++) - SzFolder_Free(&p->Folders[i], alloc); - - IAlloc_Free(alloc, p->PackSizes); - IAlloc_Free(alloc, p->PackCRCsDefined); - IAlloc_Free(alloc, p->PackCRCs); - IAlloc_Free(alloc, p->Folders); - IAlloc_Free(alloc, p->Files); - SzAr_Init(p); -} - -void SzArEx_Init(CSzArEx* p) -{ - SzAr_Init(&p->db); - p->FolderStartPackStreamIndex = 0; - p->PackStreamStartPositions = 0; - p->FolderStartFileIndex = 0; - p->FileIndexToFolderIndexMap = 0; - p->FileNameOffsets = 0; - Buf_Init(&p->FileNames); -} - -void SzArEx_Free(CSzArEx* p, ISzAlloc* alloc) -{ - IAlloc_Free(alloc, p->FolderStartPackStreamIndex); - IAlloc_Free(alloc, p->PackStreamStartPositions); - IAlloc_Free(alloc, p->FolderStartFileIndex); - IAlloc_Free(alloc, p->FileIndexToFolderIndexMap); - - IAlloc_Free(alloc, p->FileNameOffsets); - Buf_Free(&p->FileNames, alloc); - - SzAr_Free(&p->db, alloc); - SzArEx_Init(p); -} - -/* -UInt64 GetFolderPackStreamSize(int folderIndex, int streamIndex) const -{ - return PackSizes[FolderStartPackStreamIndex[folderIndex] + streamIndex]; -} - -UInt64 GetFilePackSize(int fileIndex) const -{ - int folderIndex = FileIndexToFolderIndexMap[fileIndex]; - if (folderIndex >= 0) - { - const CSzFolder &folderInfo = Folders[folderIndex]; - if (FolderStartFileIndex[folderIndex] == fileIndex) - return GetFolderFullPackSize(folderIndex); - } - return 0; -} -*/ - -#define MY_ALLOC(T, p, size, alloc) \ - { \ - if ((size) == 0) \ - p = 0; \ - else if ((p = (T*)IAlloc_Alloc(alloc, (size) * sizeof(T))) == 0) \ - return SZ_ERROR_MEM; \ - } - -static SRes SzArEx_Fill(CSzArEx* p, ISzAlloc* alloc) -{ - UInt32 startPos = 0; - UInt64 startPosSize = 0; - UInt32 i; - UInt32 folderIndex = 0; - UInt32 indexInFolder = 0; - MY_ALLOC(UInt32, p->FolderStartPackStreamIndex, p->db.NumFolders, alloc); - for (i = 0; i < p->db.NumFolders; i++) { - p->FolderStartPackStreamIndex[i] = startPos; - startPos += p->db.Folders[i].NumPackStreams; - } - - MY_ALLOC(UInt64, p->PackStreamStartPositions, p->db.NumPackStreams, alloc); - - for (i = 0; i < p->db.NumPackStreams; i++) { - p->PackStreamStartPositions[i] = startPosSize; - startPosSize += p->db.PackSizes[i]; - } - - MY_ALLOC(UInt32, p->FolderStartFileIndex, p->db.NumFolders, alloc); - MY_ALLOC(UInt32, p->FileIndexToFolderIndexMap, p->db.NumFiles, alloc); - - for (i = 0; i < p->db.NumFiles; i++) { - CSzFileItem* file = p->db.Files + i; - int emptyStream = !file->HasStream; - if (emptyStream && indexInFolder == 0) { - p->FileIndexToFolderIndexMap[i] = (UInt32)-1; - continue; - } - if (indexInFolder == 0) { - /* - v3.13 incorrectly worked with empty folders - v4.07: Loop for skipping empty folders - */ - for (;;) { - if (folderIndex >= p->db.NumFolders) - return SZ_ERROR_ARCHIVE; - p->FolderStartFileIndex[folderIndex] = i; - if (p->db.Folders[folderIndex].NumUnpackStreams != 0) - break; - folderIndex++; - } - } - p->FileIndexToFolderIndexMap[i] = folderIndex; - if (emptyStream) - continue; - indexInFolder++; - if (indexInFolder >= p->db.Folders[folderIndex].NumUnpackStreams) { - folderIndex++; - indexInFolder = 0; - } - } - return SZ_OK; -} - -UInt64 SzArEx_GetFolderStreamPos(const CSzArEx* p, UInt32 folderIndex, UInt32 indexInFolder) -{ - return p->dataPos + p->PackStreamStartPositions[p->FolderStartPackStreamIndex[folderIndex] + indexInFolder]; -} - -int SzArEx_GetFolderFullPackSize(const CSzArEx* p, UInt32 folderIndex, UInt64* resSize) -{ - UInt32 packStreamIndex = p->FolderStartPackStreamIndex[folderIndex]; - CSzFolder* folder = p->db.Folders + folderIndex; - UInt64 size = 0; - UInt32 i; - for (i = 0; i < folder->NumPackStreams; i++) { - UInt64 t = size + p->db.PackSizes[packStreamIndex + i]; - if (t < size) /* check it */ - return SZ_ERROR_FAIL; - size = t; - } - *resSize = size; - return SZ_OK; -} - -/* -SRes SzReadTime(const CObjectVector &dataVector, - CObjectVector &files, UInt64 type) -{ - CBoolVector boolVector; - RINOK(ReadBoolVector2(files.Size(), boolVector)) - - CStreamSwitch streamSwitch; - RINOK(streamSwitch.Set(this, &dataVector)); - - for (int i = 0; i < files.Size(); i++) - { - CSzFileItem &file = files[i]; - CArchiveFileTime fileTime; - bool defined = boolVector[i]; - if (defined) - { - UInt32 low, high; - RINOK(SzReadUInt32(low)); - RINOK(SzReadUInt32(high)); - fileTime.dwLowDateTime = low; - fileTime.dwHighDateTime = high; - } - switch(type) - { - case k7zIdCTime: file.IsCTimeDefined = defined; if (defined) file.CTime = fileTime; break; - case k7zIdATime: file.IsATimeDefined = defined; if (defined) file.ATime = fileTime; break; - case k7zIdMTime: file.IsMTimeDefined = defined; if (defined) file.MTime = fileTime; break; - } - } - return SZ_OK; -} -*/ - -static int TestSignatureCandidate(Byte* testBytes) -{ - size_t i; - for (i = 0; i < k7zSignatureSize; i++) - if (testBytes[i] != k7zSignature[i]) - return 0; - return 1; -} - -typedef struct _CSzState { - Byte* Data; - size_t Size; -} CSzData; - -static SRes SzReadByte(CSzData* sd, Byte* b) -{ - if (sd->Size == 0) - return SZ_ERROR_ARCHIVE; - sd->Size--; - *b = *sd->Data++; - return SZ_OK; -} - -static SRes SzReadBytes(CSzData* sd, Byte* data, size_t size) -{ - size_t i; - for (i = 0; i < size; i++) { - RINOK(SzReadByte(sd, data + i)); - } - return SZ_OK; -} - -static SRes SzReadUInt32(CSzData* sd, UInt32* value) -{ - int i; - *value = 0; - for (i = 0; i < 4; i++) { - Byte b; - RINOK(SzReadByte(sd, &b)); - *value |= ((UInt32)(b) << (8 * i)); - } - return SZ_OK; -} - -static SRes SzReadNumber(CSzData* sd, UInt64* value) -{ - Byte firstByte; - Byte mask = 0x80; - int i; - RINOK(SzReadByte(sd, &firstByte)); - *value = 0; - for (i = 0; i < 8; i++) { - Byte b; - if ((firstByte & mask) == 0) { - UInt64 highPart = firstByte & (mask - 1); - *value += (highPart << (8 * i)); - return SZ_OK; - } - RINOK(SzReadByte(sd, &b)); - *value |= ((UInt64)b << (8 * i)); - mask >>= 1; - } - return SZ_OK; -} - -static SRes SzReadNumber32(CSzData* sd, UInt32* value) -{ - UInt64 value64; - RINOK(SzReadNumber(sd, &value64)); - if (value64 >= 0x80000000) - return SZ_ERROR_UNSUPPORTED; - if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 2))) - return SZ_ERROR_UNSUPPORTED; - *value = (UInt32)value64; - return SZ_OK; -} - -static SRes SzReadID(CSzData* sd, UInt64* value) -{ - return SzReadNumber(sd, value); -} - -static SRes SzSkeepDataSize(CSzData* sd, UInt64 size) -{ - if (size > sd->Size) - return SZ_ERROR_ARCHIVE; - sd->Size -= (size_t)size; - sd->Data += (size_t)size; - return SZ_OK; -} - -static SRes SzSkeepData(CSzData* sd) -{ - UInt64 size; - RINOK(SzReadNumber(sd, &size)); - return SzSkeepDataSize(sd, size); -} - -static SRes SzReadArchiveProperties(CSzData* sd) -{ - for (;;) { - UInt64 type; - RINOK(SzReadID(sd, &type)); - if (type == k7zIdEnd) - break; - SzSkeepData(sd); - } - return SZ_OK; -} - -static SRes SzWaitAttribute(CSzData* sd, UInt64 attribute) -{ - for (;;) { - UInt64 type; - RINOK(SzReadID(sd, &type)); - if (type == attribute) - return SZ_OK; - if (type == k7zIdEnd) - return SZ_ERROR_ARCHIVE; - RINOK(SzSkeepData(sd)); - } -} - -static SRes SzReadBoolVector(CSzData* sd, size_t numItems, Byte** v, ISzAlloc* alloc) -{ - Byte b = 0; - Byte mask = 0; - size_t i; - MY_ALLOC(Byte, *v, numItems, alloc); - for (i = 0; i < numItems; i++) { - if (mask == 0) { - RINOK(SzReadByte(sd, &b)); - mask = 0x80; - } - (*v)[i] = (Byte)(((b & mask) != 0) ? 1 : 0); - mask >>= 1; - } - return SZ_OK; -} - -static SRes SzReadBoolVector2(CSzData* sd, size_t numItems, Byte** v, ISzAlloc* alloc) -{ - Byte allAreDefined; - size_t i; - RINOK(SzReadByte(sd, &allAreDefined)); - if (allAreDefined == 0) - return SzReadBoolVector(sd, numItems, v, alloc); - MY_ALLOC(Byte, *v, numItems, alloc); - for (i = 0; i < numItems; i++) - (*v)[i] = 1; - return SZ_OK; -} - -static SRes SzReadHashDigests( - CSzData* sd, - size_t numItems, - Byte** digestsDefined, - UInt32** digests, - ISzAlloc* alloc) -{ - size_t i; - RINOK(SzReadBoolVector2(sd, numItems, digestsDefined, alloc)); - MY_ALLOC(UInt32, *digests, numItems, alloc); - for (i = 0; i < numItems; i++) - if ((*digestsDefined)[i]) { - RINOK(SzReadUInt32(sd, (*digests) + i)); - } - return SZ_OK; -} - -static SRes SzReadPackInfo( - CSzData* sd, - UInt64* dataOffset, - UInt32* numPackStreams, - UInt64** packSizes, - Byte** packCRCsDefined, - UInt32** packCRCs, - ISzAlloc* alloc) -{ - UInt32 i; - RINOK(SzReadNumber(sd, dataOffset)); - RINOK(SzReadNumber32(sd, numPackStreams)); - - RINOK(SzWaitAttribute(sd, k7zIdSize)); - - MY_ALLOC(UInt64, *packSizes, (size_t)*numPackStreams, alloc); - - for (i = 0; i < *numPackStreams; i++) { - RINOK(SzReadNumber(sd, (*packSizes) + i)); - } - - for (;;) { - UInt64 type; - RINOK(SzReadID(sd, &type)); - if (type == k7zIdEnd) - break; - if (type == k7zIdCRC) { - RINOK(SzReadHashDigests(sd, (size_t)*numPackStreams, packCRCsDefined, packCRCs, alloc)); - continue; - } - RINOK(SzSkeepData(sd)); - } - if (*packCRCsDefined == 0) { - MY_ALLOC(Byte, *packCRCsDefined, (size_t)*numPackStreams, alloc); - MY_ALLOC(UInt32, *packCRCs, (size_t)*numPackStreams, alloc); - for (i = 0; i < *numPackStreams; i++) { - (*packCRCsDefined)[i] = 0; - (*packCRCs)[i] = 0; - } - } - return SZ_OK; -} - -static SRes SzReadSwitch(CSzData* sd) -{ - Byte external; - RINOK(SzReadByte(sd, &external)); - return (external == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED; -} - -static SRes SzGetNextFolderItem(CSzData* sd, CSzFolder* folder, ISzAlloc* alloc) -{ - UInt32 numCoders, numBindPairs, numPackStreams, i; - UInt32 numInStreams = 0, numOutStreams = 0; - - RINOK(SzReadNumber32(sd, &numCoders)); - if (numCoders > NUM_FOLDER_CODERS_MAX) - return SZ_ERROR_UNSUPPORTED; - folder->NumCoders = numCoders; - - MY_ALLOC(CSzCoderInfo, folder->Coders, (size_t)numCoders, alloc); - - for (i = 0; i < numCoders; i++) - SzCoderInfo_Init(folder->Coders + i); - - for (i = 0; i < numCoders; i++) { - Byte mainByte; - CSzCoderInfo* coder = folder->Coders + i; - { - unsigned idSize, j; - Byte longID[15]; - RINOK(SzReadByte(sd, &mainByte)); - idSize = (unsigned)(mainByte & 0xF); - RINOK(SzReadBytes(sd, longID, idSize)); - if (idSize > sizeof(coder->MethodID)) - return SZ_ERROR_UNSUPPORTED; - coder->MethodID = 0; - for (j = 0; j < idSize; j++) - coder->MethodID |= (UInt64)longID[idSize - 1 - j] << (8 * j); - - if ((mainByte & 0x10) != 0) { - RINOK(SzReadNumber32(sd, &coder->NumInStreams)); - RINOK(SzReadNumber32(sd, &coder->NumOutStreams)); - if (coder->NumInStreams > NUM_CODER_STREAMS_MAX || coder->NumOutStreams > NUM_CODER_STREAMS_MAX) - return SZ_ERROR_UNSUPPORTED; - } else { - coder->NumInStreams = 1; - coder->NumOutStreams = 1; - } - if ((mainByte & 0x20) != 0) { - UInt64 propertiesSize = 0; - RINOK(SzReadNumber(sd, &propertiesSize)); - if (!Buf_Create(&coder->Props, (size_t)propertiesSize, alloc)) - return SZ_ERROR_MEM; - RINOK(SzReadBytes(sd, coder->Props.data, (size_t)propertiesSize)); - } - } - while ((mainByte & 0x80) != 0) { - RINOK(SzReadByte(sd, &mainByte)); - RINOK(SzSkeepDataSize(sd, (mainByte & 0xF))); - if ((mainByte & 0x10) != 0) { - UInt32 n; - RINOK(SzReadNumber32(sd, &n)); - RINOK(SzReadNumber32(sd, &n)); - } - if ((mainByte & 0x20) != 0) { - UInt64 propertiesSize = 0; - RINOK(SzReadNumber(sd, &propertiesSize)); - RINOK(SzSkeepDataSize(sd, propertiesSize)); - } - } - numInStreams += coder->NumInStreams; - numOutStreams += coder->NumOutStreams; - } - - if (numOutStreams == 0) - return SZ_ERROR_UNSUPPORTED; - - folder->NumBindPairs = numBindPairs = numOutStreams - 1; - MY_ALLOC(CSzBindPair, folder->BindPairs, (size_t)numBindPairs, alloc); - - for (i = 0; i < numBindPairs; i++) { - CSzBindPair* bp = folder->BindPairs + i; - RINOK(SzReadNumber32(sd, &bp->InIndex)); - RINOK(SzReadNumber32(sd, &bp->OutIndex)); - } - - if (numInStreams < numBindPairs) - return SZ_ERROR_UNSUPPORTED; - - folder->NumPackStreams = numPackStreams = numInStreams - numBindPairs; - MY_ALLOC(UInt32, folder->PackStreams, (size_t)numPackStreams, alloc); - - if (numPackStreams == 1) { - for (i = 0; i < numInStreams; i++) - if (SzFolder_FindBindPairForInStream(folder, i) < 0) - break; - if (i == numInStreams) - return SZ_ERROR_UNSUPPORTED; - folder->PackStreams[0] = i; - } else - for (i = 0; i < numPackStreams; i++) { - RINOK(SzReadNumber32(sd, folder->PackStreams + i)); - } - return SZ_OK; -} - -static SRes SzReadUnpackInfo( - CSzData* sd, - UInt32* numFolders, - CSzFolder** folders, /* for alloc */ - ISzAlloc* alloc, - ISzAlloc* allocTemp) -{ - UInt32 i; - RINOK(SzWaitAttribute(sd, k7zIdFolder)); - RINOK(SzReadNumber32(sd, numFolders)); - { - RINOK(SzReadSwitch(sd)); - - MY_ALLOC(CSzFolder, *folders, (size_t)*numFolders, alloc); - - for (i = 0; i < *numFolders; i++) - SzFolder_Init((*folders) + i); - - for (i = 0; i < *numFolders; i++) { - RINOK(SzGetNextFolderItem(sd, (*folders) + i, alloc)); - } - } - - RINOK(SzWaitAttribute(sd, k7zIdCodersUnpackSize)); - - for (i = 0; i < *numFolders; i++) { - UInt32 j; - CSzFolder* folder = (*folders) + i; - UInt32 numOutStreams = SzFolder_GetNumOutStreams(folder); - - MY_ALLOC(UInt64, folder->UnpackSizes, (size_t)numOutStreams, alloc); - - for (j = 0; j < numOutStreams; j++) { - RINOK(SzReadNumber(sd, folder->UnpackSizes + j)); - } - } - - for (;;) { - UInt64 type; - RINOK(SzReadID(sd, &type)); - if (type == k7zIdEnd) - return SZ_OK; - if (type == k7zIdCRC) { - SRes res; - Byte* crcsDefined = 0; - UInt32* crcs = 0; - res = SzReadHashDigests(sd, *numFolders, &crcsDefined, &crcs, allocTemp); - if (res == SZ_OK) { - for (i = 0; i < *numFolders; i++) { - CSzFolder* folder = (*folders) + i; - folder->UnpackCRCDefined = crcsDefined[i]; - folder->UnpackCRC = crcs[i]; - } - } - IAlloc_Free(allocTemp, crcs); - IAlloc_Free(allocTemp, crcsDefined); - RINOK(res); - continue; - } - RINOK(SzSkeepData(sd)); - } -} - -static SRes SzReadSubStreamsInfo( - CSzData* sd, - UInt32 numFolders, - CSzFolder* folders, - UInt32* numUnpackStreams, - UInt64** unpackSizes, - Byte** digestsDefined, - UInt32** digests, - ISzAlloc* allocTemp) -{ - UInt64 type = 0; - UInt32 i; - UInt32 si = 0; - UInt32 numDigests = 0; - - for (i = 0; i < numFolders; i++) - folders[i].NumUnpackStreams = 1; - *numUnpackStreams = numFolders; - - for (;;) { - RINOK(SzReadID(sd, &type)); - if (type == k7zIdNumUnpackStream) { - *numUnpackStreams = 0; - for (i = 0; i < numFolders; i++) { - UInt32 numStreams; - RINOK(SzReadNumber32(sd, &numStreams)); - folders[i].NumUnpackStreams = numStreams; - *numUnpackStreams += numStreams; - } - continue; - } - if (type == k7zIdCRC || type == k7zIdSize) - break; - if (type == k7zIdEnd) - break; - RINOK(SzSkeepData(sd)); - } - - if (*numUnpackStreams == 0) { - *unpackSizes = 0; - *digestsDefined = 0; - *digests = 0; - } else { - *unpackSizes = (UInt64*)IAlloc_Alloc(allocTemp, (size_t)*numUnpackStreams * sizeof(UInt64)); - RINOM(*unpackSizes); - *digestsDefined = (Byte*)IAlloc_Alloc(allocTemp, (size_t)*numUnpackStreams * sizeof(Byte)); - RINOM(*digestsDefined); - *digests = (UInt32*)IAlloc_Alloc(allocTemp, (size_t)*numUnpackStreams * sizeof(UInt32)); - RINOM(*digests); - } - - for (i = 0; i < numFolders; i++) { - /* - v3.13 incorrectly worked with empty folders - v4.07: we check that folder is empty - */ - UInt64 sum = 0; - UInt32 j; - UInt32 numSubstreams = folders[i].NumUnpackStreams; - if (numSubstreams == 0) - continue; - if (type == k7zIdSize) - for (j = 1; j < numSubstreams; j++) { - UInt64 size; - RINOK(SzReadNumber(sd, &size)); - (*unpackSizes)[si++] = size; - sum += size; - } - (*unpackSizes)[si++] = SzFolder_GetUnpackSize(folders + i) - sum; - } - if (type == k7zIdSize) { - RINOK(SzReadID(sd, &type)); - } - - for (i = 0; i < *numUnpackStreams; i++) { - (*digestsDefined)[i] = 0; - (*digests)[i] = 0; - } - - for (i = 0; i < numFolders; i++) { - UInt32 numSubstreams = folders[i].NumUnpackStreams; - if (numSubstreams != 1 || !folders[i].UnpackCRCDefined) - numDigests += numSubstreams; - } - - si = 0; - for (;;) { - if (type == k7zIdCRC) { - int digestIndex = 0; - Byte* digestsDefined2 = 0; - UInt32* digests2 = 0; - SRes res = SzReadHashDigests(sd, numDigests, &digestsDefined2, &digests2, allocTemp); - if (res == SZ_OK) { - for (i = 0; i < numFolders; i++) { - CSzFolder* folder = folders + i; - UInt32 numSubstreams = folder->NumUnpackStreams; - if (numSubstreams == 1 && folder->UnpackCRCDefined) { - (*digestsDefined)[si] = 1; - (*digests)[si] = folder->UnpackCRC; - si++; - } else { - UInt32 j; - for (j = 0; j < numSubstreams; j++, digestIndex++) { - (*digestsDefined)[si] = digestsDefined2[digestIndex]; - (*digests)[si] = digests2[digestIndex]; - si++; - } - } - } - } - IAlloc_Free(allocTemp, digestsDefined2); - IAlloc_Free(allocTemp, digests2); - RINOK(res); - } else if (type == k7zIdEnd) - return SZ_OK; - else { - RINOK(SzSkeepData(sd)); - } - RINOK(SzReadID(sd, &type)); - } -} - -static SRes SzReadStreamsInfo( - CSzData* sd, - UInt64* dataOffset, - CSzAr* p, - UInt32* numUnpackStreams, - UInt64** unpackSizes, /* allocTemp */ - Byte** digestsDefined, /* allocTemp */ - UInt32** digests, /* allocTemp */ - ISzAlloc* alloc, - ISzAlloc* allocTemp) -{ - for (;;) { - UInt64 type; - RINOK(SzReadID(sd, &type)); - if ((UInt64)(int)type != type) - return SZ_ERROR_UNSUPPORTED; - switch ((int)type) { - case k7zIdEnd: - return SZ_OK; - case k7zIdPackInfo: { - RINOK(SzReadPackInfo(sd, dataOffset, &p->NumPackStreams, - &p->PackSizes, &p->PackCRCsDefined, &p->PackCRCs, alloc)); - break; - } - case k7zIdUnpackInfo: { - RINOK(SzReadUnpackInfo(sd, &p->NumFolders, &p->Folders, alloc, allocTemp)); - break; - } - case k7zIdSubStreamsInfo: { - RINOK(SzReadSubStreamsInfo(sd, p->NumFolders, p->Folders, - numUnpackStreams, unpackSizes, digestsDefined, digests, allocTemp)); - break; - } - default: - return SZ_ERROR_UNSUPPORTED; - } - } -} - -size_t SzArEx_GetFileNameUtf16(const CSzArEx* p, size_t fileIndex, UInt16* dest) -{ - size_t len = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; - if (dest != 0) { - size_t i; - const Byte* src = p->FileNames.data + (p->FileNameOffsets[fileIndex] * 2); - for (i = 0; i < len; i++) - dest[i] = GetUi16(src + i * 2); - } - return len; -} - -static SRes SzReadFileNames(const Byte* p, size_t size, UInt32 numFiles, size_t* sizes) -{ - UInt32 i; - size_t pos = 0; - for (i = 0; i < numFiles; i++) { - sizes[i] = pos; - for (;;) { - if (pos >= size) - return SZ_ERROR_ARCHIVE; - if (p[pos * 2] == 0 && p[pos * 2 + 1] == 0) - break; - pos++; - } - pos++; - } - sizes[i] = pos; - return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; -} - -static SRes SzReadHeader2( - CSzArEx* p, /* allocMain */ - CSzData* sd, - UInt64** unpackSizes, /* allocTemp */ - Byte** digestsDefined, /* allocTemp */ - UInt32** digests, /* allocTemp */ - Byte** emptyStreamVector, /* allocTemp */ - Byte** emptyFileVector, /* allocTemp */ - Byte** lwtVector, /* allocTemp */ - ISzAlloc* allocMain, - ISzAlloc* allocTemp) -{ - UInt64 type; - UInt32 numUnpackStreams = 0; - UInt32 numFiles = 0; - CSzFileItem* files = 0; - UInt32 numEmptyStreams = 0; - UInt32 i; - - RINOK(SzReadID(sd, &type)); - - if (type == k7zIdArchiveProperties) { - RINOK(SzReadArchiveProperties(sd)); - RINOK(SzReadID(sd, &type)); - } - - if (type == k7zIdMainStreamsInfo) { - RINOK(SzReadStreamsInfo(sd, - &p->dataPos, - &p->db, - &numUnpackStreams, - unpackSizes, - digestsDefined, - digests, allocMain, allocTemp)); - p->dataPos += p->startPosAfterHeader; - RINOK(SzReadID(sd, &type)); - } - - if (type == k7zIdEnd) - return SZ_OK; - if (type != k7zIdFilesInfo) - return SZ_ERROR_ARCHIVE; - - RINOK(SzReadNumber32(sd, &numFiles)); - p->db.NumFiles = numFiles; - - MY_ALLOC(CSzFileItem, files, (size_t)numFiles, allocMain); - - p->db.Files = files; - for (i = 0; i < numFiles; i++) - SzFile_Init(files + i); - - for (;;) { - UInt64 _type; - UInt64 size; - RINOK(SzReadID(sd, &_type)); - if (type == k7zIdEnd) - break; - RINOK(SzReadNumber(sd, &size)); - if (size > sd->Size) - return SZ_ERROR_ARCHIVE; - if ((UInt64)(int)_type != _type) { - RINOK(SzSkeepDataSize(sd, size)); - } else - switch ((int)_type) { - case k7zIdName: { - size_t namesSize; - RINOK(SzReadSwitch(sd)); - namesSize = (size_t)size - 1; - if ((namesSize & 1) != 0) - return SZ_ERROR_ARCHIVE; - if (!Buf_Create(&p->FileNames, namesSize, allocMain)) - return SZ_ERROR_MEM; - MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain); - memcpy(p->FileNames.data, sd->Data, namesSize); - RINOK(SzReadFileNames(sd->Data, namesSize >> 1, numFiles, p->FileNameOffsets)) - RINOK(SzSkeepDataSize(sd, namesSize)); - break; - } - case k7zIdEmptyStream: { - RINOK(SzReadBoolVector(sd, numFiles, emptyStreamVector, allocTemp)); - numEmptyStreams = 0; - for (i = 0; i < numFiles; i++) - if ((*emptyStreamVector)[i]) - numEmptyStreams++; - break; - } - case k7zIdEmptyFile: { - RINOK(SzReadBoolVector(sd, numEmptyStreams, emptyFileVector, allocTemp)); - break; - } - case k7zIdWinAttributes: { - RINOK(SzReadBoolVector2(sd, numFiles, lwtVector, allocTemp)); - RINOK(SzReadSwitch(sd)); - for (i = 0; i < numFiles; i++) { - CSzFileItem* f = &files[i]; - Byte defined = (*lwtVector)[i]; - f->AttribDefined = defined; - f->Attrib = 0; - if (defined) { - RINOK(SzReadUInt32(sd, &f->Attrib)); - } - } - IAlloc_Free(allocTemp, *lwtVector); - *lwtVector = NULL; - break; - } - case k7zIdMTime: { - RINOK(SzReadBoolVector2(sd, numFiles, lwtVector, allocTemp)); - RINOK(SzReadSwitch(sd)); - for (i = 0; i < numFiles; i++) { - CSzFileItem* f = &files[i]; - Byte defined = (*lwtVector)[i]; - f->MTimeDefined = defined; - f->MTime.Low = f->MTime.High = 0; - if (defined) { - RINOK(SzReadUInt32(sd, &f->MTime.Low)); - RINOK(SzReadUInt32(sd, &f->MTime.High)); - } - } - IAlloc_Free(allocTemp, *lwtVector); - *lwtVector = NULL; - break; - } - default: { - RINOK(SzSkeepDataSize(sd, size)); - } - } - } - - { - UInt32 emptyFileIndex = 0; - UInt32 sizeIndex = 0; - for (i = 0; i < numFiles; i++) { - CSzFileItem* file = files + i; - file->IsAnti = 0; - if (*emptyStreamVector == 0) - file->HasStream = 1; - else - file->HasStream = (Byte)((*emptyStreamVector)[i] ? 0 : 1); - if (file->HasStream) { - file->IsDir = 0; - file->Size = (*unpackSizes)[sizeIndex]; - file->Crc = (*digests)[sizeIndex]; - file->CrcDefined = (Byte)(*digestsDefined)[sizeIndex]; - sizeIndex++; - } else { - if (*emptyFileVector == 0) - file->IsDir = 1; - else - file->IsDir = (Byte)((*emptyFileVector)[emptyFileIndex] ? 0 : 1); - emptyFileIndex++; - file->Size = 0; - file->Crc = 0; - file->CrcDefined = 0; - } - } - } - return SzArEx_Fill(p, allocMain); -} - -static SRes SzReadHeader( - CSzArEx* p, - CSzData* sd, - ISzAlloc* allocMain, - ISzAlloc* allocTemp) -{ - UInt64* unpackSizes = 0; - Byte* digestsDefined = 0; - UInt32* digests = 0; - Byte* emptyStreamVector = 0; - Byte* emptyFileVector = 0; - Byte* lwtVector = 0; - SRes res = SzReadHeader2(p, sd, - &unpackSizes, &digestsDefined, &digests, - &emptyStreamVector, &emptyFileVector, &lwtVector, - allocMain, allocTemp); - IAlloc_Free(allocTemp, unpackSizes); - IAlloc_Free(allocTemp, digestsDefined); - IAlloc_Free(allocTemp, digests); - IAlloc_Free(allocTemp, emptyStreamVector); - IAlloc_Free(allocTemp, emptyFileVector); - IAlloc_Free(allocTemp, lwtVector); - return res; -} - -static SRes SzReadAndDecodePackedStreams2( - ILookInStream* inStream, - CSzData* sd, - CBuf* outBuffer, - UInt64 baseOffset, - CSzAr* p, - UInt64** unpackSizes, - Byte** digestsDefined, - UInt32** digests, - ISzAlloc* allocTemp) -{ - - UInt32 numUnpackStreams = 0; - UInt64 dataStartPos; - CSzFolder* folder; - UInt64 unpackSize; - SRes res; - - RINOK(SzReadStreamsInfo(sd, &dataStartPos, p, - &numUnpackStreams, unpackSizes, digestsDefined, digests, - allocTemp, allocTemp)); - - dataStartPos += baseOffset; - if (p->NumFolders != 1) - return SZ_ERROR_ARCHIVE; - - folder = p->Folders; - unpackSize = SzFolder_GetUnpackSize(folder); - - RINOK(LookInStream_SeekTo(inStream, dataStartPos)); - - if (!Buf_Create(outBuffer, (size_t)unpackSize, allocTemp)) - return SZ_ERROR_MEM; - - res = SzFolder_Decode(folder, p->PackSizes, - inStream, dataStartPos, - outBuffer->data, (size_t)unpackSize, allocTemp); - RINOK(res); - if (folder->UnpackCRCDefined) - if (CrcCalc(outBuffer->data, (size_t)unpackSize) != folder->UnpackCRC) - return SZ_ERROR_CRC; - return SZ_OK; -} - -static SRes SzReadAndDecodePackedStreams( - ILookInStream* inStream, - CSzData* sd, - CBuf* outBuffer, - UInt64 baseOffset, - ISzAlloc* allocTemp) -{ - CSzAr p; - UInt64* unpackSizes = 0; - Byte* digestsDefined = 0; - UInt32* digests = 0; - SRes res; - SzAr_Init(&p); - res = SzReadAndDecodePackedStreams2(inStream, sd, outBuffer, baseOffset, - &p, &unpackSizes, &digestsDefined, &digests, - allocTemp); - SzAr_Free(&p, allocTemp); - IAlloc_Free(allocTemp, unpackSizes); - IAlloc_Free(allocTemp, digestsDefined); - IAlloc_Free(allocTemp, digests); - return res; -} - -static SRes SzArEx_Open2( - CSzArEx* p, - ILookInStream* inStream, - ISzAlloc* allocMain, - ISzAlloc* allocTemp) -{ - Byte header[k7zStartHeaderSize]; - Int64 startArcPos; - UInt64 nextHeaderOffset, nextHeaderSize; - size_t nextHeaderSizeT; - UInt32 nextHeaderCRC; - CBuf buffer; - SRes res; - - startArcPos = 0; - RINOK(inStream->Seek(inStream, &startArcPos, SZ_SEEK_CUR)); - - RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)); - - if (!TestSignatureCandidate(header)) - return SZ_ERROR_NO_ARCHIVE; - if (header[6] != k7zMajorVersion) - return SZ_ERROR_UNSUPPORTED; - - nextHeaderOffset = GetUi64(header + 12); - nextHeaderSize = GetUi64(header + 20); - nextHeaderCRC = GetUi32(header + 28); - - p->startPosAfterHeader = startArcPos + k7zStartHeaderSize; - - if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) - return SZ_ERROR_CRC; - - nextHeaderSizeT = (size_t)nextHeaderSize; - if (nextHeaderSizeT != nextHeaderSize) - return SZ_ERROR_MEM; - if (nextHeaderSizeT == 0) - return SZ_OK; - if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize || nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize) - return SZ_ERROR_NO_ARCHIVE; - - { - Int64 pos = 0; - RINOK(inStream->Seek(inStream, &pos, SZ_SEEK_END)); - if ((UInt64)pos < startArcPos + nextHeaderOffset || (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset || (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) - return SZ_ERROR_INPUT_EOF; - } - - RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset)); - - if (!Buf_Create(&buffer, nextHeaderSizeT, allocTemp)) - return SZ_ERROR_MEM; - - res = LookInStream_Read(inStream, buffer.data, nextHeaderSizeT); - if (res == SZ_OK) { - res = SZ_ERROR_ARCHIVE; - if (CrcCalc(buffer.data, nextHeaderSizeT) == nextHeaderCRC) { - CSzData sd; - UInt64 type; - sd.Data = buffer.data; - sd.Size = buffer.size; - res = SzReadID(&sd, &type); - if (res == SZ_OK) { - if (type == k7zIdEncodedHeader) { - CBuf outBuffer; - Buf_Init(&outBuffer); - res = SzReadAndDecodePackedStreams(inStream, &sd, &outBuffer, p->startPosAfterHeader, allocTemp); - if (res != SZ_OK) - Buf_Free(&outBuffer, allocTemp); - else { - Buf_Free(&buffer, allocTemp); - buffer.data = outBuffer.data; - buffer.size = outBuffer.size; - sd.Data = buffer.data; - sd.Size = buffer.size; - res = SzReadID(&sd, &type); - } - } - } - if (res == SZ_OK) { - if (type == k7zIdHeader) - res = SzReadHeader(p, &sd, allocMain, allocTemp); - else - res = SZ_ERROR_UNSUPPORTED; - } - } - } - Buf_Free(&buffer, allocTemp); - return res; -} - -SRes SzArEx_Open(CSzArEx* p, ILookInStream* inStream, ISzAlloc* allocMain, ISzAlloc* allocTemp) -{ - SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); - if (res != SZ_OK) - SzArEx_Free(p, allocMain); - return res; -} - -SRes SzArEx_Extract( - const CSzArEx* p, - ILookInStream* inStream, - UInt32 fileIndex, - UInt32* blockIndex, - Byte** outBuffer, - size_t* outBufferSize, - size_t* offset, - size_t* outSizeProcessed, - ISzAlloc* allocMain, - ISzAlloc* allocTemp) -{ - UInt32 folderIndex = p->FileIndexToFolderIndexMap[fileIndex]; - SRes res = SZ_OK; - *offset = 0; - *outSizeProcessed = 0; - if (folderIndex == (UInt32)-1) { - IAlloc_Free(allocMain, *outBuffer); - *blockIndex = folderIndex; - *outBuffer = 0; - *outBufferSize = 0; - return SZ_OK; - } - - if (*outBuffer == 0 || *blockIndex != folderIndex) { - CSzFolder* folder = p->db.Folders + folderIndex; - UInt64 unpackSizeSpec = SzFolder_GetUnpackSize(folder); - size_t unpackSize = (size_t)unpackSizeSpec; - UInt64 startOffset = SzArEx_GetFolderStreamPos(p, folderIndex, 0); - - if (unpackSize != unpackSizeSpec) - return SZ_ERROR_MEM; - *blockIndex = folderIndex; - IAlloc_Free(allocMain, *outBuffer); - *outBuffer = 0; - - RINOK(LookInStream_SeekTo(inStream, startOffset)); - - if (res == SZ_OK) { - *outBufferSize = unpackSize; - if (unpackSize != 0) { - *outBuffer = (Byte*)IAlloc_Alloc(allocMain, unpackSize); - if (*outBuffer == 0) - res = SZ_ERROR_MEM; - } - if (res == SZ_OK) { - res = SzFolder_Decode(folder, - p->db.PackSizes + p->FolderStartPackStreamIndex[folderIndex], - inStream, startOffset, - *outBuffer, unpackSize, allocTemp); - if (res == SZ_OK) { - if (folder->UnpackCRCDefined) { - if (CrcCalc(*outBuffer, unpackSize) != folder->UnpackCRC) - res = SZ_ERROR_CRC; - } - } - } - } - } - if (res == SZ_OK) { - UInt32 i; - CSzFileItem* fileItem = p->db.Files + fileIndex; - *offset = 0; - for (i = p->FolderStartFileIndex[folderIndex]; i < fileIndex; i++) - *offset += (UInt32)p->db.Files[i].Size; - *outSizeProcessed = (size_t)fileItem->Size; - if (*offset + *outSizeProcessed > *outBufferSize) - return SZ_ERROR_FAIL; - if (fileItem->CrcDefined && CrcCalc(*outBuffer + *offset, *outSizeProcessed) != fileItem->Crc) - res = SZ_ERROR_CRC; - } - return res; -} diff --git a/src/core/fex/7z_C/7zStream.c b/src/core/fex/7z_C/7zStream.c index 79ca635c..92f821e2 100644 --- a/src/core/fex/7z_C/7zStream.c +++ b/src/core/fex/7z_C/7zStream.c @@ -1,163 +1,199 @@ -/* 7zStream.c -- 7z Stream functions -2010-03-11 : Igor Pavlov : Public domain */ - -#include - -#include "Types.h" - -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType) -{ - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size) -{ - return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf) -{ - size_t processed = 1; - RINOK(stream->Read(stream, buf, &processed)); - return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; -} - -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset) -{ - Int64 t = offset; - return stream->Seek(stream, &t, SZ_SEEK_SET); -} - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size) -{ - const void* lookBuf; - if (*size == 0) - return SZ_OK; - RINOK(stream->Look(stream, &lookBuf, size)); - memcpy(buf, lookBuf, *size); - return stream->Skip(stream, *size); -} - -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType) -{ - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size) -{ - return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -static SRes LookToRead_Look_Lookahead(void* pp, const void** buf, size_t* size) -{ - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - size2 = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, &size2); - p->size = size2; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Look_Exact(void* pp, const void** buf, size_t* size) -{ - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - if (*size > LookToRead_BUF_SIZE) - *size = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, size); - size2 = p->size = *size; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Skip(void* pp, size_t offset) -{ - CLookToRead* p = (CLookToRead*)pp; - p->pos += offset; - return SZ_OK; -} - -static SRes LookToRead_Read(void* pp, void* buf, size_t* size) -{ - CLookToRead* p = (CLookToRead*)pp; - size_t rem = p->size - p->pos; - if (rem == 0) - return p->realStream->Read(p->realStream, buf, size); - if (rem > *size) - rem = *size; - memcpy(buf, p->buf + p->pos, rem); - p->pos += rem; - *size = rem; - return SZ_OK; -} - -static SRes LookToRead_Seek(void* pp, Int64* pos, ESzSeek origin) -{ - CLookToRead* p = (CLookToRead*)pp; - p->pos = p->size = 0; - return p->realStream->Seek(p->realStream, pos, origin); -} - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead) -{ - p->s.Look = lookahead ? LookToRead_Look_Lookahead : LookToRead_Look_Exact; - p->s.Skip = LookToRead_Skip; - p->s.Read = LookToRead_Read; - p->s.Seek = LookToRead_Seek; -} - -void LookToRead_Init(CLookToRead* p) -{ - p->pos = p->size = 0; -} - -static SRes SecToLook_Read(void* pp, void* buf, size_t* size) -{ - CSecToLook* p = (CSecToLook*)pp; - return LookInStream_LookRead(p->realStream, buf, size); -} - -void SecToLook_CreateVTable(CSecToLook* p) -{ - p->s.Read = SecToLook_Read; -} - -static SRes SecToRead_Read(void* pp, void* buf, size_t* size) -{ - CSecToRead* p = (CSecToRead*)pp; - return p->realStream->Read(p->realStream, buf, size); -} - -void SecToRead_CreateVTable(CSecToRead* p) -{ - p->s.Read = SecToRead_Read; -} +/* 7zStream.c -- 7z Stream functions +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zTypes.h" + + +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize) +{ + size_t size = *processedSize; + *processedSize = 0; + while (size != 0) + { + size_t cur = size; + const SRes res = ISeqInStream_Read(stream, buf, &cur); + *processedSize += cur; + buf = (void *)((Byte *)buf + cur); + size -= cur; + if (res != SZ_OK) + return res; + if (cur == 0) + return SZ_OK; + } + return SZ_OK; +} + +/* +SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size) +{ + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} +*/ + + +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf) +{ + size_t processed = 1; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + + + +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset) +{ + Int64 t = (Int64)offset; + return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size) +{ + const void *lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(ILookInStream_Look(stream, &lookBuf, size)) + memcpy(buf, lookBuf, *size); + return ILookInStream_Skip(stream, *size); +} + +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ILookInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size) +{ + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + + + +#define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2) + +static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + size2 = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + if (*size > p->bufSize) + *size = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset) +{ + GET_LookToRead2 + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size) +{ + GET_LookToRead2 + size_t rem = p->size - p->pos; + if (rem == 0) + return ISeekInStream_Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin) +{ + GET_LookToRead2 + p->pos = p->size = 0; + return ISeekInStream_Seek(p->realStream, pos, origin); +} + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) +{ + p->vt.Look = lookahead ? + LookToRead2_Look_Lookahead : + LookToRead2_Look_Exact; + p->vt.Skip = LookToRead2_Skip; + p->vt.Read = LookToRead2_Read; + p->vt.Seek = LookToRead2_Seek; +} + + + +static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook) + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook *p) +{ + p->vt.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead) + return ILookInStream_Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead *p) +{ + p->vt.Read = SecToRead_Read; +} diff --git a/src/core/fex/7z_C/7zTypes.h b/src/core/fex/7z_C/7zTypes.h new file mode 100644 index 00000000..5d3c7483 --- /dev/null +++ b/src/core/fex/7z_C/7zTypes.h @@ -0,0 +1,597 @@ +/* 7zTypes.h -- Basic types +2024-01-24 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + /* + // C11/C++11: + #include + #define MY_ALIGN(n) alignas(n) + */ + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY_FACILITY_WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY_FACILITY_WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef Z7_DECL_Int32_AS_long +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef Z7_DECL_Int64_AS_long + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else +#if defined(__clang__) || defined(__GNUC__) +#include +typedef int64_t Int64; +typedef uint64_t UInt64; +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +// #define UINT64_CONST(n) n ## ULL +#endif +#endif + +#endif + +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; +#else +typedef size_t SizeT; +#endif + +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include +typedef uintptr_t MY_uintptr_t; +#endif +*/ + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define Z7_STDCALL __stdcall +#else +#define Z7_STDCALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define Z7_NO_INLINE __declspec(noinline) +#else +#define Z7_NO_INLINE +#endif + +#define Z7_FORCE_INLINE __forceinline + +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE +#endif + +#define Z7_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL +#elif defined(MY_CPU_AMD64) +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL +#else +#define Z7_FASTCALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) +{ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +Z7_C_IFACE_DECL (IByteOut) +{ + void (*Write)(IByteOutPtr p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +Z7_C_IFACE_DECL (ISeqInStream) +{ + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); +/* it can return SZ_ERROR_INPUT_EOF */ +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); + + +Z7_C_IFACE_DECL (ISeqOutStream) +{ + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +Z7_C_IFACE_DECL (ISeekInStream) +{ + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +Z7_C_IFACE_DECL (ILookInStream) +{ + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(ILookInStreamPtr p, size_t offset); + /* offset must be <= output(*size) of Look */ + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); + + +typedef struct +{ + ILookInStream vt; + ISeekInStreamPtr realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +Z7_C_IFACE_DECL (ICompressProgress) +{ + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; + +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef Z7_container_of + +/* +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +/* +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) +*/ + +#endif + +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) +*/ +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL +#endif + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) + + +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +#define k_PropVar_TimePrec_0 0 +#define k_PropVar_TimePrec_Unix 1 +#define k_PropVar_TimePrec_DOS 2 +#define k_PropVar_TimePrec_HighPrec 3 +#define k_PropVar_TimePrec_Base 16 +#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7) +#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9) + +EXTERN_C_END + +#endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/src/core/fex/7z_C/Bcj2.c b/src/core/fex/7z_C/Bcj2.c index 945941ee..c562e47c 100644 --- a/src/core/fex/7z_C/Bcj2.c +++ b/src/core/fex/7z_C/Bcj2.c @@ -1,153 +1,290 @@ -/* Bcj2.c -- Converter for x86 code (BCJ2) -2008-10-04 : Igor Pavlov : Public domain */ - -#include "Bcj2.h" - -#ifdef _LZMA_PROB32 -#define CProb UInt32 -#else -#define CProb UInt16 -#endif - -#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1)&0xF0) == 0x80) -#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_READ_BYTE (*buffer++) -#define RC_TEST \ - { \ - if (buffer == bufferLim) \ - return SZ_ERROR_DATA; \ - } -#define RC_INIT2 \ - code = 0; \ - range = 0xFFFFFFFF; \ - { \ - int i; \ - for (i = 0; i < 5; i++) { \ - RC_TEST; \ - code = (code << 8) | RC_READ_BYTE; \ - } \ - } - -#define NORMALIZE \ - if (range < kTopValue) { \ - RC_TEST; \ - range <<= 8; \ - code = (code << 8) | RC_READ_BYTE; \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); \ - NORMALIZE; -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); \ - NORMALIZE; - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize) -{ - CProb p[256 + 2]; - SizeT inPos = 0, outPos = 0; - - const Byte *buffer, *bufferLim; - UInt32 range, code; - Byte prevByte = 0; - - unsigned int _i; - for (_i = 0; _i < sizeof(p) / sizeof(p[0]); _i++) - p[_i] = kBitModelTotal >> 1; - - buffer = buf3; - bufferLim = buffer + size3; - RC_INIT2 - - if (outSize == 0) - return SZ_OK; - - for (;;) { - Byte b; - CProb* prob; - UInt32 bound; - UInt32 ttt; - - SizeT limit = size0 - inPos; - if (outSize - outPos < limit) - limit = outSize - outPos; - while (limit != 0) { - Byte _b = buf0[inPos]; - outBuf[outPos++] = _b; - if (IsJ(prevByte, _b)) - break; - inPos++; - prevByte = _b; - limit--; - } - - if (limit == 0 || outPos == outSize) - break; - - b = buf0[inPos++]; - - if (b == 0xE8) - prob = p + prevByte; - else if (b == 0xE9) - prob = p + 256; - else - prob = p + 257; - - IF_BIT_0(prob) - { - UPDATE_0(prob) - prevByte = b; - } - else - { - UInt32 dest; - const Byte* v; - UPDATE_1(prob) - if (b == 0xE8) { - v = buf1; - if (size1 < 4) - return SZ_ERROR_DATA; - buf1 += 4; - size1 -= 4; - } else { - v = buf2; - if (size2 < 4) - return SZ_ERROR_DATA; - buf2 += 4; - size2 -= 4; - } - dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | ((UInt32)v[2] << 8) | ((UInt32)v[3])) - ((UInt32)outPos + 4); - outBuf[outPos++] = (Byte)dest; - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 8); - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 16); - if (outPos == outSize) - break; - outBuf[outPos++] = prevByte = (Byte)(dest >> 24); - } - } - return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; -} +/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) +2023-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bcj2.h" +#include "CpuArch.h" + +#define kTopValue ((UInt32)1 << 24) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +// UInt32 bcj2_stats[256 + 2][2]; + +void Bcj2Dec_Init(CBcj2Dec *p) +{ + unsigned i; + p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; + p->ip = 0; + p->temp = 0; + p->range = 0; + p->code = 0; + for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) + p->probs[i] = kBitModelTotal >> 1; +} + +SRes Bcj2Dec_Decode(CBcj2Dec *p) +{ + UInt32 v = p->temp; + // const Byte *src; + if (p->range <= 5) + { + UInt32 code = p->code; + p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ + for (; p->range != 5; p->range++) + { + if (p->range == 1 && code != 0) + return SZ_ERROR_DATA; + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + return SZ_OK; + } + code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + p->code = code; + } + if (code == 0xffffffff) + return SZ_ERROR_DATA; + p->range = 0xffffffff; + } + // else + { + unsigned state = p->state; + // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop + if (BCJ2_IS_32BIT_STREAM(state)) + { + const Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return SZ_OK; + p->bufs[state] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v = GetBe32a(cur) - ip; + p->ip = ip; + } + state = BCJ2_DEC_STATE_ORIG_0; + } + if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) + { + Byte *dest = p->dest; + for (;;) + { + if (dest == p->destLim) + { + p->state = state; + p->temp = v; + return SZ_OK; + } + *dest++ = (Byte)v; + p->dest = dest; + if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) + break; + v >>= 8; + } + } + } + + // src = p->bufs[BCJ2_STREAM_MAIN]; + for (;;) + { + /* + if (BCJ2_IS_32BIT_STREAM(p->state)) + p->state = BCJ2_DEC_STATE_OK; + else + */ + { + if (p->range < kTopValue) + { + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + p->temp = v; + return SZ_OK; + } + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + { + const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; + const Byte *srcLim; + Byte *dest = p->dest; + { + const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); + SizeT num = (SizeT)(p->destLim - dest); + if (num >= rem) + num = rem; + #define NUM_ITERS 4 + #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 + num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) + #else + num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) + #endif + srcLim = src + num; + } + + #define NUM_SHIFT_BITS 24 + #define ONE_ITER(indx) { \ + const unsigned b = src[indx]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + /* ++dest */; /* v = b; */ } + + if (src != srcLim) + for (;;) + { + /* The dependency chain of 2-cycle for (v) calculation is not big problem here. + But we can remove dependency chain with v = b in the end of loop. */ + ONE_ITER(0) + #if (NUM_ITERS > 1) + ONE_ITER(1) + #if (NUM_ITERS > 2) + ONE_ITER(2) + #if (NUM_ITERS > 3) + ONE_ITER(3) + #if (NUM_ITERS > 4) + ONE_ITER(4) + #if (NUM_ITERS > 5) + ONE_ITER(5) + #if (NUM_ITERS > 6) + ONE_ITER(6) + #if (NUM_ITERS > 7) + ONE_ITER(7) + #endif + #endif + #endif + #endif + #endif + #endif + #endif + + src += NUM_ITERS; + if (src == srcLim) + break; + } + + if (src == srcLim) + #if (NUM_ITERS > 1) + for (;;) + #endif + { + #if (NUM_ITERS > 1) + if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) + #endif + { + const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = src; + p->dest = dest; + p->ip += (UInt32)num; + /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + p->state = + src == p->lims[BCJ2_STREAM_MAIN] ? + (unsigned)BCJ2_STREAM_MAIN : + (unsigned)BCJ2_DEC_STATE_ORIG; + p->temp = v; + return SZ_OK; + } + #if (NUM_ITERS > 1) + ONE_ITER(0) + src++; + #endif + } + + { + const SizeT num = (SizeT)(dest - p->dest); + p->dest = dest; // p->dest += num; + p->bufs[BCJ2_STREAM_MAIN] += num; // = src; + p->ip += (UInt32)num; + } + { + UInt32 bound, ttt; + CBcj2Prob *prob; // unsigned index; + /* + prob = p->probs + (unsigned)((Byte)v == 0xe8 ? + 2 + (Byte)(v >> 8) : + ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); + */ + { + const unsigned c = ((v + 0x17) >> 6) & 1; + prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + // (Byte) + // 8x->0 : e9->1 : xxe8->xx+2 + // 8x->0x100 : e9->0x101 : xxe8->xx + // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); + // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); + } + ttt = *prob; + bound = (p->range >> kNumBitModelTotalBits) * ttt; + if (p->code < bound) + { + // bcj2_stats[prob - p->probs][0]++; + p->range = bound; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + continue; + } + { + // bcj2_stats[prob - p->probs][1]++; + p->range -= bound; + p->code -= bound; + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } + } + } + } + { + /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ + // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; + const Byte *cur = p->bufs[cj]; + Byte *dest; + SizeT rem; + if (cur == p->lims[cj]) + { + p->state = cj; + break; + } + v = GetBe32a(cur); + p->bufs[cj] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v -= ip; + p->ip = ip; + } + dest = p->dest; + rem = (SizeT)(p->destLim - dest); + if (rem < 4) + { + if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; + if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; + if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} + p->temp = v; + p->dest = dest + rem; + p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; + break; + } + SetUi32(dest, v) + v >>= 24; + p->dest = dest + 4; + } + } + + if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC]) + { + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + return SZ_OK; +} + +#undef NUM_ITERS +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/src/core/fex/7z_C/Bcj2.h b/src/core/fex/7z_C/Bcj2.h index f642e741..2eda0c82 100644 --- a/src/core/fex/7z_C/Bcj2.h +++ b/src/core/fex/7z_C/Bcj2.h @@ -1,38 +1,332 @@ -/* Bcj2.h -- Converter for x86 code (BCJ2) -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __BCJ2_H -#define __BCJ2_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* -Conditions: - outSize <= FullOutputSize, - where FullOutputSize is full size of output stream of x86_2 filter. - -If buf0 overlaps outBuf, there are two required conditions: - 1) (buf0 >= outBuf) - 2) (buf0 + size0 >= outBuf + FullOutputSize). - -Returns: - SZ_OK - SZ_ERROR_DATA - Data error -*/ - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize); - -#ifdef __cplusplus -} -#endif - -#endif +/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2) +2023-03-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BCJ2_H +#define ZIP7_INC_BCJ2_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define BCJ2_NUM_STREAMS 4 + +enum +{ + BCJ2_STREAM_MAIN, + BCJ2_STREAM_CALL, + BCJ2_STREAM_JUMP, + BCJ2_STREAM_RC +}; + +enum +{ + BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS, + BCJ2_DEC_STATE_ORIG_1, + BCJ2_DEC_STATE_ORIG_2, + BCJ2_DEC_STATE_ORIG_3, + + BCJ2_DEC_STATE_ORIG, + BCJ2_DEC_STATE_ERROR /* after detected data error */ +}; + +enum +{ + BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, + BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */ +}; + + +/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */ +#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2) + +/* +CBcj2Dec / CBcj2Enc +bufs sizes: + BUF_SIZE(n) = lims[n] - bufs[n] +bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4: + (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 + (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 +*/ + +// typedef UInt32 CBcj2Prob; +typedef UInt16 CBcj2Prob; + +/* +BCJ2 encoder / decoder internal requirements: + - If last bytes of stream contain marker (e8/e8/0f8x), then + there is also encoded symbol (0 : no conversion) in RC stream. + - One case of overlapped instructions is supported, + if last byte of converted instruction is (0f) and next byte is (8x): + marker [xx xx xx 0f] 8x + then the pair (0f 8x) is treated as marker. +*/ + +/* ---------- BCJ2 Decoder ---------- */ + +/* +CBcj2Dec: +(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: + bufs[BCJ2_STREAM_MAIN] >= dest && + bufs[BCJ2_STREAM_MAIN] - dest >= + BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) + reserve = bufs[BCJ2_STREAM_MAIN] - dest - + ( BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) ) + and additional conditions: + if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve != 1) : if (ver < v23.00) + } + else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve >= 6) : if (ver < v23.00) + (reserve >= 4) : if (ver >= v23.00) + We need that (reserve) because after first call of Bcj2Dec_Decode(), + CBcj2Dec::temp can contain up to 4 bytes for writing to (dest). + } + (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode(). + (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers, + and (reserve) is calculated from full (final) sizes of all streams before first call. +*/ + +typedef struct +{ + const Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + Byte *dest; + const Byte *destLim; + + unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + + UInt32 ip; /* property of starting base for decoding */ + UInt32 temp; /* Byte temp[4]; */ + UInt32 range; + UInt32 code; + CBcj2Prob probs[2 + 256]; +} CBcj2Dec; + + +/* Note: + Bcj2Dec_Init() sets (CBcj2Dec::ip = 0) + if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init() +*/ +void Bcj2Dec_Init(CBcj2Dec *p); + + +/* Bcj2Dec_Decode(): + returns: + SZ_OK + SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct +*/ +SRes Bcj2Dec_Decode(CBcj2Dec *p); + +/* To check that decoding was finished you can compare + sizes of processed streams with sizes known from another sources. + You must do at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. + additional optional checks: + - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC) + - the checks Bcj2Dec_IsMaybeFinished*() + also before actual decoding you can check that the + following condition is met for stream sizes: + ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) ) +*/ + +/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for + additional input data in BCJ2_STREAM_MAIN stream. + Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN) + +/* if the stream decoding was finished correctly, then range decoder + part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0). + Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0) + +/* use Bcj2Dec_IsMaybeFinished() only as additional check + after at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. +*/ +#define Bcj2Dec_IsMaybeFinished(_p_) ( \ + Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \ + Bcj2Dec_IsMaybeFinished_code(_p_)) + + + +/* ---------- BCJ2 Encoder ---------- */ + +typedef enum +{ + BCJ2_ENC_FINISH_MODE_CONTINUE, + BCJ2_ENC_FINISH_MODE_END_BLOCK, + BCJ2_ENC_FINISH_MODE_END_STREAM +} EBcj2Enc_FinishMode; + +/* + BCJ2_ENC_FINISH_MODE_CONTINUE: + process non finished encoding. + It notifies the encoder that additional further calls + can provide more input data (src) than provided by current call. + In that case the CBcj2Enc encoder still can move (src) pointer + up to (srcLim), but CBcj2Enc encoder can store some of the last + processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer. + at return: + (CBcj2Enc::src will point to position that includes + processed data and data copied to (temp[]) buffer) + That data from (temp[]) buffer will be used in further calls. + + BCJ2_ENC_FINISH_MODE_END_BLOCK: + finish encoding of current block (ended at srcLim) without RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) && + CBcj2Enc::src == CBcj2Enc::srcLim) + : it shows that block encoding was finished. And the encoder is + ready for new (src) data or for stream finish operation. + finished block means + { + CBcj2Enc has completed block encoding up to (srcLim). + (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will + not cross block boundary at (srcLim). + temporary CBcj2Enc buffer for (ORIG) src data is empty. + 3 output uncompressed streams (MAIN, CALL, JUMP) were flushed. + RC stream was not flushed. And RC stream will cross block boundary. + } + Note: some possible implementation of BCJ2 encoder could + write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(), + and it could calculate symbol for RC in another call of Bcj2Enc_Encode(). + BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol. + And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls. + So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK + to ensure that RC symbol is calculated and written in proper block. + + BCJ2_ENC_FINISH_MODE_END_STREAM + finish encoding of stream (ended at srcLim) fully including RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED) + : it shows that stream encoding was finished fully, + and all output streams were flushed fully. + also Bcj2Enc_IsFinished() can be called. +*/ + + +/* + 32-bit relative offset in JUMP/CALL commands is + - (mod 4 GiB) for 32-bit x86 code + - signed Int32 for 64-bit x86-64 code + BCJ2 encoder also does internal relative to absolute address conversions. + And there are 2 possible ways to do it: + before v23: we used 32-bit variables and (mod 4 GiB) conversion + since v23: we use 64-bit variables and (signed Int32 offset) conversion. + The absolute address condition for conversion in v23: + ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64) + note that if (fileSize64 > 2 GiB). there is difference between + old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23). + And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases. +*/ + +/* +// for old (v22) way for conversion: +typedef UInt32 CBcj2Enc_ip_unsigned; +typedef Int32 CBcj2Enc_ip_signed; +#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31) +*/ +typedef UInt64 CBcj2Enc_ip_unsigned; +typedef Int64 CBcj2Enc_ip_signed; + +/* maximum size of file that can be used for conversion condition */ +#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2) + +/* default value of fileSize64_minus1 variable that means + that absolute address limitation will not be used */ +#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1) + +/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */ +#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \ + ((CBcj2Enc_ip_unsigned)(fileSize) - 1) + +/* set CBcj2Enc::fileSize64_minus1 variable from size of file */ +#define Bcj2Enc_SET_FileSize(p, fileSize) \ + (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize); + + +typedef struct +{ + Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + const Byte *src; + const Byte *srcLim; + + unsigned state; + EBcj2Enc_FinishMode finishMode; + + Byte context; + Byte flushRem; + Byte isFlushState; + + Byte cache; + UInt32 range; + UInt64 low; + UInt64 cacheSize; + + // UInt32 context; // for marker version, it can include marker flag. + + /* (ip64) and (fileIp64) correspond to virtual source stream position + that doesn't include data in temp[] */ + CBcj2Enc_ip_unsigned ip64; /* current (ip) position */ + CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */ + CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */ + UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */ + // UInt32 relatExcludeBits; + + UInt32 tempTarget; + unsigned tempPos; /* the number of bytes that were copied to temp[] buffer + (tempPos <= 4) outside of Bcj2Enc_Encode() */ + // Byte temp[4]; // for marker version + Byte temp[8]; + CBcj2Prob probs[2 + 256]; +} CBcj2Enc; + +void Bcj2Enc_Init(CBcj2Enc *p); + + +/* +Bcj2Enc_Encode(): at exit: + p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream + (bufs[p->State] == lims[p->State]) + p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream + (src == srcLim) + p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream +*/ +void Bcj2Enc_Encode(CBcj2Enc *p); + +/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream. + CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer. + (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after + fully processed data and after data copied to temp buffer. + So if the caller needs to get real number of fully processed input + bytes (without look ahead data in temp buffer), + the caller must subtruct (CBcj2Enc::tempPos) value from processed size + value that is calculated based on current (CBcj2Enc::src): + cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) - + Bcj2Enc_Get_AvailInputSize_in_Temp(&enc); +*/ +/* get the size of input data that was stored in temp[] buffer: */ +#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos) + +#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0) + +/* Note : the decoder supports overlapping of marker (0f 80). + But we can eliminate such overlapping cases by setting + the limit for relative offset conversion as + CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB) +*/ +/* default value for CBcj2Enc::relatLimit */ +#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24) +#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31) +// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5 + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Bra.c b/src/core/fex/7z_C/Bra.c index 0716842a..818cc78b 100644 --- a/src/core/fex/7z_C/Bra.c +++ b/src/core/fex/7z_C/Bra.c @@ -1,112 +1,709 @@ -/* Bra.c -- Converters for RISC code -2010-04-16 : Igor Pavlov : Public domain */ - -#include "Bra.h" - -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) -{ - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 8; - for (i = 0; i <= size; i += 4) { - if (data[i + 3] == 0xEB) { - UInt32 dest; - UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); - src <<= 2; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 2; - data[i + 2] = (Byte)(dest >> 16); - data[i + 1] = (Byte)(dest >> 8); - data[i + 0] = (Byte)dest; - } - } - return i; -} - -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) -{ - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 4; - for (i = 0; i <= size; i += 2) { - if ((data[i + 1] & 0xF8) == 0xF0 && (data[i + 3] & 0xF8) == 0xF8) { - UInt32 dest; - UInt32 src = (((UInt32)data[i + 1] & 0x7) << 19) | ((UInt32)data[i + 0] << 11) | (((UInt32)data[i + 3] & 0x7) << 8) | (data[i + 2]); - - src <<= 1; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 1; - - data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); - data[i + 0] = (Byte)(dest >> 11); - data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); - data[i + 2] = (Byte)dest; - i += 2; - } - } - return i; -} - -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) -{ - SizeT i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) { - UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | ((UInt32)data[i + 1] << 16) | ((UInt32)data[i + 2] << 8) | ((UInt32)data[i + 3] & (~3)); - - UInt32 dest; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] &= 0x3; - data[i + 3] |= dest; - } - } - return i; -} - -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) -{ - UInt32 i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if ((data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00) || (data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0)) { - UInt32 src = ((UInt32)data[i + 0] << 24) | ((UInt32)data[i + 1] << 16) | ((UInt32)data[i + 2] << 8) | ((UInt32)data[i + 3]); - UInt32 dest; - - src <<= 2; - if (encoding) - dest = ip + i + src; - else - dest = src - (ip + i); - dest >>= 2; - - dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; - - data[i + 0] = (Byte)(dest >> 24); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] = (Byte)dest; - } - } - return i; -} +/* Bra.c -- Branch converters for RISC code +2024-01-20 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "RotateDefs.h" +#include "CpuArch.h" + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV(name) z7_ ## name + +#define Z7_BRANCH_FUNC_MAIN(name) \ +static \ +Z7_FORCE_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) + +#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ + { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ + +#ifdef Z7_EXTRACT_ONLY +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) +#else +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) +#endif + +#if defined(__clang__) +#define BR_EXTERNAL_FOR +#define BR_NEXT_ITERATION continue; +#else +#define BR_EXTERNAL_FOR for (;;) +#define BR_NEXT_ITERATION break; +#endif + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + // GCC is not good for __builtin_expect() here + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // #define Z7_unlikely [[unlikely]] + // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_likely [[likely]] +#else + // #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << (24 - 4); + const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); + size &= ~(SizeT)3; + // if (size == 0) return p; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + BR_EXTERNAL_FOR + { + // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (;;) + { + UInt32 v; + if Z7_UNLIKELY(p == lim) + return p; + v = GetUi32a(p); + p += 4; + if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) + { + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x03ffffff; + v |= 0x94000000; + SetUi32a(p - 4, v) + BR_NEXT_ITERATION + } + // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) + v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) + { + UInt32 z, c; + // v = rotrFixed(v, 8); + v += flag; if Z7_UNLIKELY(v & mask) continue; + z = (v & 0xffffffe0) | (v >> 26); + c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; + BR_CONVERT_VAL(z, c) + v &= 0x1f; + v |= 0x90000000; + v |= z << 26; + v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); + SetUi32a(p - 4, v) + } + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to next instruction */ + pc += 8 - 4; + + for (;;) + { + for (;;) + { + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + } + { + UInt32 v = GetUi32a(p - 4); + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x00ffffff; + v |= 0xeb000000; + SetUi32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + // v = GetBe32a(p); + v = *(UInt32 *)(void *)p; + p += 4; + // if ((v & 0xfc000003) == 0x48000001) break; + // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; + if Z7_UNLIKELY( + ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) + & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; + } + { + v = Z7_CONV_NATIVE_TO_BE_32(v); + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= 0x03ffffff; + v |= 0x48000000; + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) + + +#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED +#define BR_SPARC_USE_ROTATE +#endif + +Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << 22; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + /* // the code without GetBe32a(): + { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } + */ + v = GetBe32a(p); + p += 4; + #ifdef BR_SPARC_USE_ROTATE + v = rotlFixed(v, 2); + v += (flag << 2) - 1; + if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) + #else + v += (UInt32)5 << 29; + v ^= (UInt32)7 << 29; + v += flag; + if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) + #endif + break; + } + { + // UInt32 v = GetBe32a(p - 4); + #ifndef BR_SPARC_USE_ROTATE + v <<= 2; + #endif + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= (flag << 3) - 1; + #ifdef BR_SPARC_USE_ROTATE + v -= (flag << 2) - 1; + v = rotrFixed(v, 2); + #else + v -= (flag << 2); + v >>= 2; + v |= (UInt32)1 << 30; + #endif + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) +{ + // Byte *p = data; + Byte *lim; + size &= ~(SizeT)1; + // if (size == 0) return p; + if (size <= 2) return p; + size -= 2; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to the +2 instructions from current instruction */ + // pc += 4 - 4; + // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; + // #define ARMT_TAIL_PROC { goto armt_tail; } + #define ARMT_TAIL_PROC { return p; } + + do + { + /* in MSVC 32-bit x86 compilers: + UInt32 version : it loads value from memory with movzx + Byte version : it loads value to 8-bit register (AL/CL) + movzx version is slightly faster in some cpus + */ + unsigned b1; + // Byte / unsigned + b1 = p[1]; + // optimized version to reduce one (p >= lim) check: + // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) + for (;;) + { + unsigned b3; // Byte / UInt32 + /* (Byte)(b3) normalization can use low byte computations in MSVC. + It gives smaller code, and no loss of speed in some compilers/cpus. + But new MSVC 32-bit x86 compilers use more slow load + from memory to low byte register in that case. + So we try to use full 32-bit computations for faster code. + */ + // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; + } + { + /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. + But gcc/clang for arm64 can use bfi instruction for full code here */ + UInt32 v = + ((UInt32)GetUi16a(p - 2) << 11) | + ((UInt32)GetUi16a(p) & 0x7FF); + /* + UInt32 v = + ((UInt32)p[1 - 2] << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + */ + p += 2; + { + UInt32 c = BR_PC_GET >> 1; + BR_CONVERT_VAL(v, c) + } + SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) + SetUi16a(p - 2, (UInt16)(v | 0xf800)) + /* + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xf8 | (v >> 8)); + */ + } + } + while (p < lim); + return p; + // armt_tail: + // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; + // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); + // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); + // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) + + +// #define BR_IA64_NO_INLINE + +Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)15; + lim = p + size; + pc -= 1 << 4; + pc >>= 4 - 1; + // pc -= 1 << 1; + + for (;;) + { + unsigned m; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); + p += 16; + pc += 1 << 1; + if (m &= 3) + break; + } + { + p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. + do + { + const UInt32 t = + #if defined(MY_CPU_X86_OR_AMD64) + // we use 32-bit load here to reduce code size on x86: + GetUi32(p); + #else + GetUi16(p); + #endif + UInt32 z = GetUi32(p + 1) >> m; + p += 5; + if (((t >> m) & (0x70 << 1)) == 0 + && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) + { + UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; + z ^= v; + #ifdef BR_IA64_NO_INLINE + v |= (v & ((UInt32)1 << (23 + 1))) >> 3; + { + UInt32 c = pc; + BR_CONVERT_VAL(v, c) + } + v &= (0x1fffff << 1) | 1; + #else + { + if (encoding) + { + // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits + pc &= (0x1fffff << 1) | 1; + v += pc; + } + else + { + // pc |= 0xc00000 << 1; // we need to set at least 2 bits + pc |= ~(UInt32)((0x1fffff << 1) | 1); + v -= pc; + } + } + v &= ~(UInt32)(0x600000 << 1); + #endif + v += (0x700000 << 1); + v &= (0x8fffff << 1) | 1; + z |= v; + z <<= m; + SetUi32(p + 1 - 5, z) + } + m++; + } + while (m &= 3); // while (m < 4); + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) + + +#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; +#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; + +#if 1 && defined(MY_CPU_LE_UNALIGN) + #define RISCV_USE_UNALIGNED_LOAD +#endif + +#ifdef RISCV_USE_UNALIGNED_LOAD + #define RISCV_GET_UI32(p) GetUi32(p) + #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } +#else + #define RISCV_GET_UI32(p) \ + ((UInt32)GetUi16a(p) + \ + ((UInt32)GetUi16a((p) + 2) << 16)) + #define RISCV_SET_UI32(p, v) { \ + SetUi16a(p, (UInt16)(v)) \ + SetUi16a((p) + 2, (UInt16)(v >> 16)) } +#endif + +#if 1 && defined(MY_CPU_LE) + #define RISCV_USE_16BIT_LOAD +#endif + +#ifdef RISCV_USE_16BIT_LOAD + #define RISCV_LOAD_VAL(p) GetUi16a(p) +#else + #define RISCV_LOAD_VAL(p) (*(p)) +#endif + +#define RISCV_INSTR_SIZE 2 +#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) +#define RISCV_STEP_2 4 +#define RISCV_REG_VAL (2 << 7) +#define RISCV_CMD_VAL 3 +#if 1 + // for code size optimization: + #define RISCV_DELTA_7F 0x7f +#else + #define RISCV_DELTA_7F 0 +#endif + +#define RISCV_CHECK_1(v, b) \ + (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) + +#if 1 + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ + << 18) \ + < ((r) & 0x1d)) +#else + // this branch gives larger code, because + // compilers generate larger code for big constants. + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + < ((r) & 0x1d)) +#endif + + +#define RISCV_SCAN_LOOP \ + Byte *lim; \ + size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ + if (size <= 6) return p; \ + size -= 6; \ + lim = p + size; \ + BR_PC_INIT \ + for (;;) \ + { \ + UInt32 a, v; \ + /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ + for (;;) \ + { \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ + if ((a & 0x77) == 0) break; \ + a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ + p += RISCV_INSTR_SIZE * 2; \ + if ((a & 0x77) == 0) \ + { \ + p -= RISCV_INSTR_SIZE; \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + break; \ + } \ + } +// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL +// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL +// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC +// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC + +Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP + v = a; + a = RISCV_GET_UI32(p); +#ifndef RISCV_USE_16BIT_LOAD + v += (UInt32)p[1] << 8; +#endif + + if ((v & 8) == 0) // JAL + { + if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + v = ((a & 1u << 31) >> 11) + | ((a & 0x3ff << 21) >> 20) + | ((a & 1 << 20) >> 9) + | (a & 0xff << 12); + BR_CONVERT_VAL_ENC(v) + // ((v & 1) == 0) + // v: bits [1 : 20] contain offset bits +#if 0 && defined(RISCV_USE_UNALIGNED_LOAD) + a &= 0xfff; + a |= ((UInt32)(v << 23)) + | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) + | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); + RISCV_SET_UI32(p, a) +#else // aligned +#if 0 + SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) +#else + p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v <<= 15; + v = Z7_BSWAP32(v); + SetUi16a(p + 2, (UInt16)v) +#else + p[2] = (Byte)(v >> 9); + p[3] = (Byte)(v >> 1); +#endif +#endif // aligned + } + p += 4; + continue; + } // JAL + + { + // AUIPC + if (v & 0xe80) // (not x0) and (not x2) + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (RISCV_CHECK_1(v, b)) + { + { + const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, temp) + } + a &= 0xfffff000; + { +#if 1 + const int t = -1 >> 1; + if (t != -1) + a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation + else +#endif + a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). + } + BR_CONVERT_VAL_ENC(a) +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + a = Z7_BSWAP32(a); + RISCV_SET_UI32(p + 4, a) +#else + SetBe32(p + 4, a) +#endif + p += 8; + } + else + p += RISCV_STEP_1; + } + else + { + UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + v = RISCV_GET_UI32(p + 4); + r = (r << 7) + 0x17 + (v & 0xfffff000); + a = (a >> 12) | (v << 20); + RISCV_SET_UI32(p, r) + RISCV_SET_UI32(p + 4, a) + p += 8; + } + else + p += RISCV_STEP_2; + } + } + } // for +} + + +Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP +#ifdef RISCV_USE_16BIT_LOAD + if ((a & 8) == 0) + { +#else + v = a; + a += (UInt32)p[1] << 8; + if ((v & 8) == 0) + { +#endif + // JAL + a -= 0x100 - RISCV_DELTA_7F; + if (a & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; +#if 0 // unaligned + a = GetUi32(p); + v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) + | (UInt32)(a >> 7) & ((UInt32)0xff << 9) +#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v = GetUi16a(p + 2); + v = Z7_BSWAP32(v) >> 15 +#else + v = (UInt32)p[3] << 1 + | (UInt32)p[2] << 9 +#endif + | (UInt32)((a & 0xf000) << 5); + BR_CONVERT_VAL_DEC(v) + a = a_old + | (v << 11 & 1u << 31) + | (v << 20 & 0x3ff << 21) + | (v << 9 & 1 << 20) + | (v & 0xff << 12); + RISCV_SET_UI32(p, a) + } + p += 4; + continue; + } // JAL + + { + // AUIPC + v = a; +#if 1 && defined(RISCV_USE_UNALIGNED_LOAD) + a = GetUi32(p); +#else + a |= (UInt32)GetUi16a(p + 2) << 16; +#endif + if ((v & 0xe80) == 0) // x0/x2 + { + const UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + UInt32 b; +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + b = RISCV_GET_UI32(p + 4); + b = Z7_BSWAP32(b); +#else + b = GetBe32(p + 4); +#endif + v = a >> 12; + BR_CONVERT_VAL_DEC(b) + a = (r << 7) + 0x17; + a += (b + 0x800) & 0xfffff000; + v |= b << 20; + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + else + p += RISCV_STEP_2; + } + else + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (!RISCV_CHECK_1(v, b)) + p += RISCV_STEP_1; + else + { + v = (a & 0xfffff000) | (b >> 20); + a = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + } + } + } // for +} diff --git a/src/core/fex/7z_C/Bra.h b/src/core/fex/7z_C/Bra.h index 3bc1edc9..060ee170 100644 --- a/src/core/fex/7z_C/Bra.h +++ b/src/core/fex/7z_C/Bra.h @@ -1,71 +1,105 @@ -/* Bra.h -- Branch converters for executables -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __BRA_H -#define __BRA_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* -These functions convert relative addresses to absolute addresses -in CALL instructions to increase the compression ratio. - - In: - data - data buffer - size - size of data - ip - current virtual Instruction Pinter (IP) value - state - state variable for x86 converter - encoding - 0 (for decoding), 1 (for encoding) - - Out: - state - state variable for x86 converter - - Returns: - The number of processed bytes. If you call these functions with multiple calls, - you must start next call with first byte after block of processed bytes. - - Type Endian Alignment LookAhead - - x86 little 1 4 - ARMT little 2 2 - ARM little 4 0 - PPC big 4 0 - SPARC big 4 0 - IA64 little 16 0 - - size must be >= Alignment + LookAhead, if it's not last block. - If (size < Alignment + LookAhead), converter returns 0. - - Example: - - UInt32 ip = 0; - for () - { - ; size must be >= Alignment + LookAhead, if it's not last block - SizeT processed = Convert(data, size, ip, 1); - data += processed; - size -= processed; - ip += processed; - } -*/ - -#define x86_Convert_Init(state) \ - { \ - state = 0; \ - } -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding); -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); - -#ifdef __cplusplus -} -#endif - -#endif +/* Bra.h -- Branch converters for executables +2024-01-20 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BRA_H +#define ZIP7_INC_BRA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define PPC BAD_PPC_11 // for debug */ + +#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc +#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec +#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc + +#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc) +#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state) + +typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); +typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); + +#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86)); + +#define Z7_BRANCH_FUNCS_DECL(name) \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name)); + +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT) +Z7_BRANCH_FUNCS_DECL (BranchConv_PPC) +Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC) +Z7_BRANCH_FUNCS_DECL (BranchConv_IA64) +Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV) + +/* +These functions convert data that contain CPU instructions. +Each such function converts relative addresses to absolute addresses in some +branch instructions: CALL (in all converters) and JUMP (X86 converter only). +Such conversion allows to increase compression ratio, if we compress that data. + +There are 2 types of converters: + Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc); + Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state); +Each Converter supports 2 versions: one for encoding +and one for decoding (_Enc/_Dec postfixes in function name). + +In params: + data : data buffer + size : size of data + pc : current virtual Program Counter (Instruction Pointer) value +In/Out param: + state : pointer to state variable (for X86 converter only) + +Return: + The pointer to position in (data) buffer after last byte that was processed. + If the caller calls converter again, it must call it starting with that position. + But the caller is allowed to move data in buffer. So pointer to + current processed position also will be changed for next call. + Also the caller must increase internal (pc) value for next call. + +Each converter has some characteristics: Endian, Alignment, LookAhead. + Type Endian Alignment LookAhead + + X86 little 1 4 + ARMT little 2 2 + RISCV little 2 6 + ARM little 4 0 + ARM64 little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + (data) must be aligned for (Alignment). + processed size can be calculated as: + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0) + it means that converter needs more data for processing. + If (size < Alignment + LookAhead) + then (processed == 0) is allowed. + +Example code for conversion in loop: + UInt32 pc = 0; + size = 0; + for (;;) + { + size += Load_more_input_data(data + size); + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0 && no_more_input_data_after_size) + break; // we stop convert loop + data += processed; + size -= processed; + pc += processed; + } +*/ + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Bra86.c b/src/core/fex/7z_C/Bra86.c index a266e92f..9c468ba7 100644 --- a/src/core/fex/7z_C/Bra86.c +++ b/src/core/fex/7z_C/Bra86.c @@ -1,77 +1,187 @@ -/* Bra86.c -- Converter for x86 code (BCJ) -2008-10-04 : Igor Pavlov : Public domain */ - -#include "Bra.h" - -#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) - -const Byte kMaskToAllowedStatus[8] = { 1, 1, 1, 0, 1, 0, 0, 0 }; -const Byte kMaskToBitNumber[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; - -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding) -{ - SizeT bufferPos = 0, prevPosT; - UInt32 prevMask = *state & 0x7; - if (size < 5) - return 0; - ip += 5; - prevPosT = (SizeT)0 - 1; - - for (;;) { - Byte* p = data + bufferPos; - Byte* limit = data + size - 4; - for (; p < limit; p++) - if ((*p & 0xFE) == 0xE8) - break; - bufferPos = (SizeT)(p - data); - if (p >= limit) - break; - prevPosT = bufferPos - prevPosT; - if (prevPosT > 3) - prevMask = 0; - else { - prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; - if (prevMask != 0) { - Byte b = p[4 - kMaskToBitNumber[prevMask]]; - if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) { - prevPosT = bufferPos; - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - continue; - } - } - } - prevPosT = bufferPos; - - if (Test86MSByte(p[4])) { - UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); - UInt32 dest; - for (;;) { - Byte b; - int index; - if (encoding) - dest = (ip + (UInt32)bufferPos) + src; - else - dest = src - (ip + (UInt32)bufferPos); - if (prevMask == 0) - break; - index = kMaskToBitNumber[prevMask] * 8; - b = (Byte)(dest >> (24 - index)); - if (!Test86MSByte(b)) - break; - src = dest ^ ((1 << (32 - index)) - 1); - } - p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); - p[3] = (Byte)(dest >> 16); - p[2] = (Byte)(dest >> 8); - p[1] = (Byte)dest; - bufferPos += 5; - } else { - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - } - } - prevPosT = bufferPos - prevPosT; - *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); - return bufferPos; -} +/* Bra86.c -- Branch converter for X86 code (BCJ) +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "CpuArch.h" + + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name + +#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) + +#ifdef MY_CPU_LE_UNALIGN + #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; + #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) +#else + #define BR86_PREPARE_BCJ_SCAN + // bad for MSVC X86 (partial write to byte reg): + #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) + // bad for old MSVC (partial write to byte reg): + // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) +#endif + +static +Z7_FORCE_INLINE +Z7_ATTRIB_NO_VECTOR +Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) +{ + if (size < 5) + return p; + { + // Byte *p = data; + const Byte *lim = p + size - 4; + unsigned mask = (unsigned)*state; // & 7; +#ifdef BR_CONV_USE_OPT_PC_PTR + /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), + because call/jump offset is relative to the next instruction. + if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), + because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. + */ + pc += 4; +#endif + BR_PC_INIT + goto start; + + for (;; mask |= 4) + { + // cont: mask |= 4; + start: + if (p >= lim) + goto fin; + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + } + goto main_loop; + + m0: p--; + m1: p--; + m2: p--; + if (mask == 0) + goto a3; + if (p > lim) + goto fin_p; + + // if (((0x17u >> mask) & 1) == 0) + if (mask > 4 || mask == 3) + { + mask >>= 1; + continue; // goto cont; + } + mask >>= 1; + if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) + continue; // goto cont; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + { + mask <<= 3; + if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) + { + v ^= (((UInt32)0x100 << mask) - 1); + #ifdef MY_CPU_X86 + // for X86 : we can recalculate (c) to reduce register pressure + c = BR_PC_GET; + #endif + BR_CONVERT_VAL(v, c) + } + mask = 0; + } + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + + main_loop: + if (p >= lim) + goto fin; + for (;;) + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto a0; } + if (BR86_IS_BCJ_BYTE(1)) { goto a1; } + if (BR86_IS_BCJ_BYTE(2)) { goto a2; } + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + if (p >= lim) + goto fin; + } + + a0: p--; + a1: p--; + a2: p--; + a3: + if (p > lim) + goto fin_p; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + } + +fin_p: + p--; +fin: + // the following processing for tail is optional and can be commented + /* + lim += 4; + for (; p < lim; p++, mask >>= 1) + if ((*p & 0xfe) == 0xe8) + break; + */ + *state = (UInt32)mask; + return p; + } +} + + +#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ + { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } + +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) +#ifndef Z7_EXTRACT_ONLY +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) +#endif diff --git a/src/core/fex/7z_C/Compiler.h b/src/core/fex/7z_C/Compiler.h new file mode 100644 index 00000000..6f8db4ca --- /dev/null +++ b/src/core/fex/7z_C/Compiler.h @@ -0,0 +1,236 @@ +/* Compiler.h : Compiler specific defines and pragmas +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H + +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED + #endif +#endif +*/ + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#endif + + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif + +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined + +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/src/core/fex/7z_C/CpuArch.c b/src/core/fex/7z_C/CpuArch.c index 185c04c6..2560f908 100644 --- a/src/core/fex/7z_C/CpuArch.c +++ b/src/core/fex/7z_C/CpuArch.c @@ -1,223 +1,970 @@ -/* CpuArch.c -- CPU specific code -2010-10-26: Igor Pavlov : Public domain */ - -#include "CpuArch.h" - -#ifdef MY_CPU_X86_OR_AMD64 - -#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) || defined(__clang__) -#define USE_ASM -#endif - -#if defined(USE_ASM) && !defined(MY_CPU_AMD64) -static UInt32 CheckFlag(UInt32 flag) -{ -#ifdef _MSC_VER - __asm pushfd; - __asm pop EAX; - __asm mov EDX, EAX; - __asm xor EAX, flag; - __asm push EAX; - __asm popfd; - __asm pushfd; - __asm pop EAX; - __asm xor EAX, EDX; - __asm push EDX; - __asm popfd; - __asm and flag, EAX; -#else - __asm__ __volatile__( - "pushf\n\t" - "pop %%EAX\n\t" - "movl %%EAX,%%EDX\n\t" - "xorl %0,%%EAX\n\t" - "push %%EAX\n\t" - "popf\n\t" - "pushf\n\t" - "pop %%EAX\n\t" - "xorl %%EDX,%%EAX\n\t" - "push %%EDX\n\t" - "popf\n\t" - "andl %%EAX, %0\n\t" - : "=c"(flag) - : "c"(flag)); -#endif - return flag; -} -#define CHECK_CPUID_IS_SUPPORTED \ - if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) \ - return False; -#else -#define CHECK_CPUID_IS_SUPPORTED -#endif - -static void MyCPUID(UInt32 function, UInt32* a, UInt32* b, UInt32* c, UInt32* d) -{ -#ifdef USE_ASM - -#ifdef _MSC_VER - - UInt32 a2, b2, c2, d2; - __asm xor EBX, EBX; - __asm xor ECX, ECX; - __asm xor EDX, EDX; - __asm mov EAX, function; - __asm cpuid; - __asm mov a2, EAX; - __asm mov b2, EBX; - __asm mov c2, ECX; - __asm mov d2, EDX; - - *a = a2; - *b = b2; - *c = c2; - *d = d2; - -#else - - __asm__ __volatile__( - "cpuid" - : "=a"(*a), - "=b"(*b), - "=c"(*c), - "=d"(*d) - : "0"(function)); - -#endif - -#else - - int CPUInfo[4]; - __cpuid(CPUInfo, function); - *a = CPUInfo[0]; - *b = CPUInfo[1]; - *c = CPUInfo[2]; - *d = CPUInfo[3]; - -#endif -} - -Bool x86cpuid_CheckAndRead(Cx86cpuid* p) -{ - CHECK_CPUID_IS_SUPPORTED - MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); - MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); - return True; -} - -static UInt32 kVendors[][3] = { - { 0x756E6547, 0x49656E69, 0x6C65746E }, - { 0x68747541, 0x69746E65, 0x444D4163 }, - { 0x746E6543, 0x48727561, 0x736C7561 } -}; - -int x86cpuid_GetFirm(const Cx86cpuid* p) -{ - unsigned i; - for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) { - const UInt32* v = kVendors[i]; - if (v[0] == p->vendor[0] && v[1] == p->vendor[1] && v[2] == p->vendor[2]) - return (int)i; - } - return -1; -} - -Bool CPU_Is_InOrder() -{ - Cx86cpuid p; - int firm; - UInt32 family, model; - if (!x86cpuid_CheckAndRead(&p)) - return True; - family = x86cpuid_GetFamily(&p); - model = x86cpuid_GetModel(&p); - firm = x86cpuid_GetFirm(&p); - switch (firm) { - case CPU_FIRM_INTEL: - return (family < 6 || (family == 6 && model == 0x100C)); - case CPU_FIRM_AMD: - return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); - case CPU_FIRM_VIA: - return (family < 6 || (family == 6 && model < 0xF)); - } - return True; -} - -#if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include -#include -#include - -static int GetOSMajorVersion() -{ - WCHAR path[2048] = {}; - if (!GetSystemDirectoryW(path, 2048)) - return 0; - -#if __STDC_WANT_SECURE_LIB__ - wcscat_s(path, 2048, L"\\kernel32.dll" ); -#else - wcscat(path, L"\\kernel32.dll"); -#endif - - // - // Based on example code from this article - // http://support.microsoft.com/kb/167597 - // - - DWORD handle; -#if (_WIN32_WINNT >= _WIN32_WINNT_VISTA) - DWORD len = GetFileVersionInfoSizeExW(FILE_VER_GET_NEUTRAL, path, &handle); -#else - DWORD len = GetFileVersionInfoSizeW(path, &handle); -#endif - if (!len) - return 0; - - uint8_t *buff = (uint8_t *)malloc(len + 1); - if (!buff) - return 0; - -#if (_WIN32_WINNT >= _WIN32_WINNT_VISTA) - if (!GetFileVersionInfoExW(FILE_VER_GET_NEUTRAL, path, 0, len, buff)) -#else - if (!GetFileVersionInfoW(path, 0, len, buff)) -#endif - return 0; - - VS_FIXEDFILEINFO *vInfo = NULL; - UINT infoSize; - - if (!VerQueryValueW(buff, L"\\", (LPVOID*)(&vInfo), &infoSize)) - return 0; - - if (buff) - free(buff); - - if (!infoSize) - return 0; - - return HIWORD(vInfo->dwFileVersionMS); -} - -static Bool CPU_Sys_Is_SSE_Supported() -{ - return (GetOSMajorVersion() >= 5); -} -#define CHECK_SYS_SSE_SUPPORT \ - if (!CPU_Sys_Is_SSE_Supported()) \ - return False; -#else -#define CHECK_SYS_SSE_SUPPORT -#endif - -Bool CPU_Is_Aes_Supported() -{ - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) - return False; - return (p.c >> 25) & 1; -} - -#endif +/* CpuArch.c -- CPU specific code +Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID +#endif + +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + most functions use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#else + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#endif + +#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + x86_cpuid_MACRO(p, func) +} + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + x86_cpuid_MACRO_2(p, func, subFunc) +} + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; + __asm__ __volatile__ ( + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: + #endif + __asm ret 0 +} + +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +static +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + UNUSED_VAR(subFunc) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm mov ecx, [esp + 12] // subFunc + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 4 +} + +#else // MY_CPU_AMD64 + + #if _MSC_VER >= 1600 + #include + #define MY_cpuidex __cpuidex + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + __cpuidex((int *)p, func, subFunc); +} + + #else +/* + __cpuid (func == (0 or 7)) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FASTCALL / NO_INLINE func. + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! +*/ +static +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) +{ + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); +} + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + MY_cpuidex_HACK(subFunc, func, (Int32 *)p); +} + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE +#endif +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + MY_cpuidex((Int32 *)p, (Int32)func, 0); +} + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + Int32 a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} + +#endif // MY_CPU_AMD64 +#endif // _MSC_VER + +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif +#undef NEED_CHECK_FOR_CPUID + + +static +BoolInt x86cpuid_Func_1(UInt32 *p) +{ + CHECK_CPUID_IS_SUPPORTED + z7_x86_cpuid(p, 1); + return True; +} + +/* +static const UInt32 kVendors[][1] = +{ + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } +}; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); + + switch (x86cpuid_GetFirm(&p)) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return False; // v23 : unknown processors are not In-Order +} +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) +{ +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) +{ + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 15) & 1; +} + +BoolInt CPU_IsSupported_SSE(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 26) & 1; +} + +#endif + + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return a[2]; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) +{ + CHECK_SYS_SSE_SUPPORT + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + return (BoolInt)(d[1] >> 29) & 1; + } +} + + +BoolInt CPU_IsSupported_SHA512(void) +{ + if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid_subFunc(d, 7, 0); + if (d[0] < 1) // d[0] - is max supported subleaf value + return False; + z7_x86_cpuid_subFunc(d, 7, 1); + return (BoolInt)(d[0]) & 1; + } +} + +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better + +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ + +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif +#endif + +#if defined(ATTRIB_XGETBV) +#include +#endif + + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) +{ +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} + +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ + #ifdef _WIN32 + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) + return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ + #endif + + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5); // avx2 + } +} + +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 + } +} +#endif + +BoolInt CPU_IsSupported_VAES_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB(void) +{ + CHECK_CPUID_IS_SUPPORTED + { + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); + if (d[0] < 0x80000001) + return False; + z7_x86_cpuid(d, 0x80000001); + return (BoolInt)(d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include "7zWindows.h" + +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ + +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + +BoolInt CPU_IsSupported_CRC32(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +BoolInt CPU_IsSupported_SHA512(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); +} + +/* +BoolInt CPU_IsSupported_SHA3(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); +} +*/ + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include () +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif + +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include +#define USE_HWCAP +#endif + +#ifdef USE_HWCAP + +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include () +#include + #endif + #endif +#endif + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else + MY_HWCAP_CHECK_FUNC(NEON) +#endif + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) +#ifdef MY_CPU_ARM64 +// supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. +// we define them here, if they are not defined +#ifndef HWCAP_SHA3 +// #define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SHA512 +// #pragma message("=== HWCAP_SHA512 define === ") +#define HWCAP_SHA512 (1 << 21) +#endif +MY_HWCAP_CHECK_FUNC (SHA512) +// MY_HWCAP_CHECK_FUNC (SHA3) +#endif + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/src/core/fex/7z_C/CpuArch.h b/src/core/fex/7z_C/CpuArch.h index 0a853ae9..699aab4b 100644 --- a/src/core/fex/7z_C/CpuArch.h +++ b/src/core/fex/7z_C/CpuArch.h @@ -1,159 +1,678 @@ -/* CpuArch.h -- CPU specific code -2010-12-01: Igor Pavlov : Public domain */ - -#ifndef __CPU_ARCH_H -#define __CPU_ARCH_H - -#include "Types.h" - -EXTERN_C_BEGIN - -/* -MY_CPU_LE means that CPU is LITTLE ENDIAN. -If MY_CPU_LE is not defined, we don't know about that property of platform (it can be LITTLE ENDIAN). - -MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. -If MY_CPU_LE_UNALIGN is not defined, we don't know about these properties of platform. -*/ - -#if defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) -#define MY_CPU_AMD64 -#endif - -#if defined(MY_CPU_AMD64) || defined(_M_IA64) -#define MY_CPU_64BIT -#endif - -#if defined(_M_IX86) || defined(__i386__) -#define MY_CPU_X86 -#endif - -#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) -#define MY_CPU_X86_OR_AMD64 -#endif - -#if defined(MY_CPU_X86) || defined(_M_ARM) -#define MY_CPU_32BIT -#endif - -#if defined(_WIN32) && defined(_M_ARM) -#define MY_CPU_ARM_LE -#endif - -#if defined(_WIN32) && defined(_M_IA64) -#define MY_CPU_IA64_LE -#endif - -#if defined(MY_CPU_X86_OR_AMD64) -#define MY_CPU_LE_UNALIGN -#endif - -#if defined(MY_CPU_X86_OR_AMD64) || defined(MY_CPU_ARM_LE) || defined(MY_CPU_IA64_LE) || defined(__ARMEL__) || defined(__MIPSEL__) || defined(__LITTLE_ENDIAN__) -#define MY_CPU_LE -#endif - -#if defined(__BIG_ENDIAN__) || defined(__m68k__) || defined(__ARMEB__) || defined(__MIPSEB__) -#define MY_CPU_BE -#endif - -#if defined(MY_CPU_LE) && defined(MY_CPU_BE) -Stop_Compiling_Bad_Endian -#endif - -#ifdef MY_CPU_LE_UNALIGN - -#define GetUi16(p) (*(const UInt16*)(p)) -#define GetUi32(p) (*(const UInt32*)(p)) -#define GetUi64(p) (*(const UInt64*)(p)) -#define SetUi16(p, d) *(UInt16*)(p) = (d); -#define SetUi32(p, d) *(UInt32*)(p) = (d); -#define SetUi64(p, d) *(UInt64*)(p) = (d); - -#else - -#define GetUi16(p) (((const Byte*)(p))[0] | ((UInt16)((const Byte*)(p))[1] << 8)) - -#define GetUi32(p) ( \ - ((const Byte*)(p))[0] | ((UInt32)((const Byte*)(p))[1] << 8) | ((UInt32)((const Byte*)(p))[2] << 16) | ((UInt32)((const Byte*)(p))[3] << 24)) - -#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte*)(p)) + 4) << 32)) - -#define SetUi16(p, d) \ - { \ - UInt32 _x_ = (d); \ - ((Byte*)(p))[0] = (Byte)_x_; \ - ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ - } - -#define SetUi32(p, d) \ - { \ - UInt32 _x_ = (d); \ - ((Byte*)(p))[0] = (Byte)_x_; \ - ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ - ((Byte*)(p))[2] = (Byte)(_x_ >> 16); \ - ((Byte*)(p))[3] = (Byte)(_x_ >> 24); \ - } - -#define SetUi64(p, d) \ - { \ - UInt64 _x64_ = (d); \ - SetUi32(p, (UInt32)_x64_); \ - SetUi32(((Byte*)(p)) + 4, (UInt32)(_x64_ >> 32)); \ - } - -#endif - -#if defined(MY_CPU_LE_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) - -#ifndef _M_X64 -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) -#endif - -#define GetBe32(p) _byteswap_ulong(*(const UInt32*)(const Byte*)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64*)(const Byte*)(p)) - -#else - -#define GetBe32(p) ( \ - ((UInt32)((const Byte*)(p))[0] << 24) | ((UInt32)((const Byte*)(p))[1] << 16) | ((UInt32)((const Byte*)(p))[2] << 8) | ((const Byte*)(p))[3]) - -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte*)(p)) + 4)) - -#endif - -#define GetBe16(p) (((UInt16)((const Byte*)(p))[0] << 8) | ((const Byte*)(p))[1]) - -#ifdef MY_CPU_X86_OR_AMD64 - - typedef struct -{ - UInt32 maxFunc; - UInt32 vendor[3]; - UInt32 ver; - UInt32 b; - UInt32 c; - UInt32 d; -} Cx86cpuid; - -enum { - CPU_FIRM_INTEL, - CPU_FIRM_AMD, - CPU_FIRM_VIA -}; - -Bool x86cpuid_CheckAndRead(Cx86cpuid* p); -int x86cpuid_GetFirm(const Cx86cpuid* p); - -#define x86cpuid_GetFamily(p) (((p)->ver >> 8) & 0xFF00F) -#define x86cpuid_GetModel(p) (((p)->ver >> 4) & 0xF00F) -#define x86cpuid_GetStepping(p) ((p)->ver & 0xF) - -Bool CPU_Is_InOrder(); -Bool CPU_Is_Aes_Supported(); - -#endif - -EXTERN_C_END - -#endif +/* CpuArch.h -- CPU specific code +Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) +*/ + +#if !defined(_M_ARM64EC) +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else + #define MY_CPU_NAME "arm64" +#endif + #define MY_CPU_SIZEOF_POINTER 8 +#endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_ARM32 + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#define MY_CPU_PPC_OR_PPC64 + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + +#if defined(__riscv) \ + || defined(__riscv__) + #define MY_CPU_RISCV + #if __riscv_xlen == 32 + #define MY_CPU_NAME "riscv32" + #elif __riscv_xlen == 64 + #define MY_CPU_NAME "riscv64" + #else + #define MY_CPU_NAME "riscv" + #endif +#endif + + +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef GetUi64 +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif + +#ifndef SetUi64 +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } +#endif + + +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) + +#if 0 +// Z7_BSWAP16 can be slow for x86-msvc +#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p))) +#else +#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16) +#endif + +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } + +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); } +#endif + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif + +#ifndef SetBe64 +#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 56); \ + _ppp_[1] = (Byte)(_vvv_ >> 48); \ + _ppp_[2] = (Byte)(_vvv_ >> 40); \ + _ppp_[3] = (Byte)(_vvv_ >> 32); \ + _ppp_[4] = (Byte)(_vvv_ >> 24); \ + _ppp_[5] = (Byte)(_vvv_ >> 16); \ + _ppp_[6] = (Byte)(_vvv_ >> 8); \ + _ppp_[7] = (Byte)_vvv_; } +#endif + +#ifndef GetBe16 +#ifdef GetBe16_to32 +#define GetBe16(p) ( (UInt16) GetBe16_to32(p)) +#else +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) +#endif +#endif + + +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV +#endif + + +#if defined(MY_CPU_BE) + +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetUi64a(p) GetUi64(p) +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) + +#elif defined(MY_CPU_LE) + +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetBe64a(p) GetBe64(p) +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) + +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif + + +#ifndef GetBe16_to32 +#define GetBe16_to32(p) GetBe16(p) +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + + +#ifdef MY_CPU_X86_OR_AMD64 + +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_SHA512(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif +BoolInt CPU_IsSupported_SHA512(void); + +#endif + +#if defined(__APPLE__) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +#endif + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Delta.c b/src/core/fex/7z_C/Delta.c new file mode 100644 index 00000000..fc7e9fe9 --- /dev/null +++ b/src/core/fex/7z_C/Delta.c @@ -0,0 +1,169 @@ +/* Delta.c -- Delta converter +2021-02-09 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Delta.h" + +void Delta_Init(Byte *state) +{ + unsigned i; + for (i = 0; i < DELTA_STATE_SIZE; i++) + state[i] = 0; +} + + +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + + { + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do + { + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + + { + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } + { + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) + { + --p; *p = (Byte)(*p - p[dif]); + } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); + } + } +} + + +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) + { + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else + { + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) + { + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) + { + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do + { + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + + { + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; + } + } + } + + do + *state++ = *data; + while (++data != lim); +} diff --git a/src/core/fex/7z_C/Delta.h b/src/core/fex/7z_C/Delta.h new file mode 100644 index 00000000..be77c6c6 --- /dev/null +++ b/src/core/fex/7z_C/Delta.h @@ -0,0 +1,19 @@ +/* Delta.h -- Delta converter +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_DELTA_H +#define ZIP7_INC_DELTA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define DELTA_STATE_SIZE 256 + +void Delta_Init(Byte *state); +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size); +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Lzma2Dec.c b/src/core/fex/7z_C/Lzma2Dec.c index 5666dc1c..64420ef0 100644 --- a/src/core/fex/7z_C/Lzma2Dec.c +++ b/src/core/fex/7z_C/Lzma2Dec.c @@ -1,327 +1,493 @@ -/* Lzma2Dec.c -- LZMA2 Decoder -2010-12-15 : Igor Pavlov : Public domain */ - -/* #define SHOW_DEBUG_INFO */ - -#ifdef SHOW_DEBUG_INFO -#include -#endif - -#include - -#include "Lzma2Dec.h" - -/* -00000000 - EOS -00000001 U U - Uncompressed Reset Dic -00000010 U U - Uncompressed No Reset -100uuuuu U U P P - LZMA no reset -101uuuuu U U P P - LZMA reset state -110uuuuu U U P P S - LZMA reset state + new prop -111uuuuu U U P P S - LZMA reset state + new prop + reset dic - - u, U - Unpack Size - P - Pack Size - S - Props -*/ - -#define LZMA2_CONTROL_LZMA (1 << 7) -#define LZMA2_CONTROL_COPY_NO_RESET 2 -#define LZMA2_CONTROL_COPY_RESET_DIC 1 -#define LZMA2_CONTROL_EOF 0 - -#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0) - -#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3) -#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2) - -#define LZMA2_LCLP_MAX 4 -#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p)&1)) << ((p) / 2 + 11)) - -#ifdef SHOW_DEBUG_INFO -#define PRF(x) x -#else -#define PRF(x) -#endif - -typedef enum { - LZMA2_STATE_CONTROL, - LZMA2_STATE_UNPACK0, - LZMA2_STATE_UNPACK1, - LZMA2_STATE_PACK0, - LZMA2_STATE_PACK1, - LZMA2_STATE_PROP, - LZMA2_STATE_DATA, - LZMA2_STATE_DATA_CONT, - LZMA2_STATE_FINISHED, - LZMA2_STATE_ERROR -} ELzma2State; - -static SRes Lzma2Dec_GetOldProps(Byte prop, Byte* props) -{ - UInt32 dicSize; - if (prop > 40) - return SZ_ERROR_UNSUPPORTED; - dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); - props[0] = (Byte)LZMA2_LCLP_MAX; - props[1] = (Byte)(dicSize); - props[2] = (Byte)(dicSize >> 8); - props[3] = (Byte)(dicSize >> 16); - props[4] = (Byte)(dicSize >> 24); - return SZ_OK; -} - -SRes Lzma2Dec_AllocateProbs(CLzma2Dec* p, Byte prop, ISzAlloc* alloc) -{ - Byte props[LZMA_PROPS_SIZE]; - RINOK(Lzma2Dec_GetOldProps(prop, props)); - return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); -} - -SRes Lzma2Dec_Allocate(CLzma2Dec* p, Byte prop, ISzAlloc* alloc) -{ - Byte props[LZMA_PROPS_SIZE]; - RINOK(Lzma2Dec_GetOldProps(prop, props)); - return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); -} - -void Lzma2Dec_Init(CLzma2Dec* p) -{ - p->state = LZMA2_STATE_CONTROL; - p->needInitDic = True; - p->needInitState = True; - p->needInitProp = True; - LzmaDec_Init(&p->decoder); -} - -static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec* p, Byte b) -{ - switch (p->state) { - case LZMA2_STATE_CONTROL: - p->control = b; - PRF(printf("\n %4X ", p->decoder.dicPos)); - PRF(printf(" %2X", b)); - if (p->control == 0) - return LZMA2_STATE_FINISHED; - if (LZMA2_IS_UNCOMPRESSED_STATE(p)) { - if ((p->control & 0x7F) > 2) - return LZMA2_STATE_ERROR; - p->unpackSize = 0; - } else - p->unpackSize = (UInt32)(p->control & 0x1F) << 16; - return LZMA2_STATE_UNPACK0; - - case LZMA2_STATE_UNPACK0: - p->unpackSize |= (UInt32)b << 8; - return LZMA2_STATE_UNPACK1; - - case LZMA2_STATE_UNPACK1: - p->unpackSize |= (UInt32)b; - p->unpackSize++; - PRF(printf(" %8d", p->unpackSize)); - return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; - - case LZMA2_STATE_PACK0: - p->packSize = (UInt32)b << 8; - return LZMA2_STATE_PACK1; - - case LZMA2_STATE_PACK1: - p->packSize |= (UInt32)b; - p->packSize++; - PRF(printf(" %8d", p->packSize)); - return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP : (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA); - - case LZMA2_STATE_PROP: { - int lc, lp; - if (b >= (9 * 5 * 5)) - return LZMA2_STATE_ERROR; - lc = b % 9; - b /= 9; - p->decoder.prop.pb = b / 5; - lp = b % 5; - if (lc + lp > LZMA2_LCLP_MAX) - return LZMA2_STATE_ERROR; - p->decoder.prop.lc = lc; - p->decoder.prop.lp = lp; - p->needInitProp = False; - return LZMA2_STATE_DATA; - } - } - return LZMA2_STATE_ERROR; -} - -static void LzmaDec_UpdateWithUncompressed(CLzmaDec* p, const Byte* src, SizeT size) -{ - memcpy(p->dic + p->dicPos, src, size); - p->dicPos += size; - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) - p->checkDicSize = p->prop.dicSize; - p->processedPos += (UInt32)size; -} - -void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState); - -SRes Lzma2Dec_DecodeToDic(CLzma2Dec* p, SizeT dicLimit, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) -{ - SizeT inSize = *srcLen; - *srcLen = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; - - while (p->state != LZMA2_STATE_FINISHED) { - SizeT dicPos = p->decoder.dicPos; - if (p->state == LZMA2_STATE_ERROR) - return SZ_ERROR_DATA; - if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) { - if (*srcLen == inSize) { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - (*srcLen)++; - p->state = Lzma2Dec_UpdateState(p, *src++); - continue; - } - { - SizeT destSizeCur = dicLimit - dicPos; - SizeT srcSizeCur = inSize - *srcLen; - ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; - - if (p->unpackSize <= destSizeCur) { - destSizeCur = (SizeT)p->unpackSize; - curFinishMode = LZMA_FINISH_END; - } - - if (LZMA2_IS_UNCOMPRESSED_STATE(p)) { - if (*srcLen == inSize) { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - - if (p->state == LZMA2_STATE_DATA) { - Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); - if (initDic) - p->needInitProp = p->needInitState = True; - else if (p->needInitDic) - return SZ_ERROR_DATA; - p->needInitDic = False; - LzmaDec_InitDicAndState(&p->decoder, initDic, False); - } - - if (srcSizeCur > destSizeCur) - srcSizeCur = destSizeCur; - - if (srcSizeCur == 0) - return SZ_ERROR_DATA; - - LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur); - - src += srcSizeCur; - *srcLen += srcSizeCur; - p->unpackSize -= (UInt32)srcSizeCur; - p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; - } else { - SizeT outSizeProcessed; - SRes res; - - if (p->state == LZMA2_STATE_DATA) { - int mode = LZMA2_GET_LZMA_MODE(p); - Bool initDic = (mode == 3); - Bool initState = (mode > 0); - if ((!initDic && p->needInitDic) || (!initState && p->needInitState)) - return SZ_ERROR_DATA; - - LzmaDec_InitDicAndState(&p->decoder, initDic, initState); - p->needInitDic = False; - p->needInitState = False; - p->state = LZMA2_STATE_DATA_CONT; - } - if (srcSizeCur > p->packSize) - srcSizeCur = (SizeT)p->packSize; - - res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status); - - src += srcSizeCur; - *srcLen += srcSizeCur; - p->packSize -= (UInt32)srcSizeCur; - - outSizeProcessed = p->decoder.dicPos - dicPos; - p->unpackSize -= (UInt32)outSizeProcessed; - - RINOK(res); - if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) - return res; - - if (srcSizeCur == 0 && outSizeProcessed == 0) { - if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK || p->unpackSize != 0 || p->packSize != 0) - return SZ_ERROR_DATA; - p->state = LZMA2_STATE_CONTROL; - } - if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) - *status = LZMA_STATUS_NOT_FINISHED; - } - } - } - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return SZ_OK; -} - -SRes Lzma2Dec_DecodeToBuf(CLzma2Dec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) -{ - SizeT outSize = *destLen, inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) { - SizeT srcSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->decoder.dicPos == p->decoder.dicBufSize) - p->decoder.dicPos = 0; - dicPos = p->decoder.dicPos; - if (outSize > p->decoder.dicBufSize - dicPos) { - outSizeCur = p->decoder.dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } else { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status); - src += srcSizeCur; - inSize -= srcSizeCur; - *srcLen += srcSizeCur; - outSizeCur = p->decoder.dicPos - dicPos; - memcpy(dest, p->decoder.dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -SRes Lzma2Decode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - Byte prop, ELzmaFinishMode finishMode, ELzmaStatus* status, ISzAlloc* alloc) -{ - CLzma2Dec p; - SRes res; - SizeT outSize = *destLen, inSize = *srcLen; - *destLen = *srcLen = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; - Lzma2Dec_Construct(&p); - RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)); - p.decoder.dic = dest; - p.decoder.dicBufSize = outSize; - Lzma2Dec_Init(&p); - *srcLen = inSize; - res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - *destLen = p.decoder.dicPos; - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - Lzma2Dec_FreeProbs(&p, alloc); - return res; -} +/* Lzma2Dec.c -- LZMA2 Decoder +2024-03-01 : Igor Pavlov : Public domain */ + +/* #define SHOW_DEBUG_INFO */ + +#include "Precomp.h" + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include + +#include "Lzma2Dec.h" + +/* +00000000 - End of data +00000001 U U - Uncompressed, reset dic, need reset state and set new prop +00000010 U U - Uncompressed, no reset +100uuuuu U U P P - LZMA, no reset +101uuuuu U U P P - LZMA, reset state +110uuuuu U U P P S - LZMA, reset state + set new prop +111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic + + u, U - Unpack Size + P - Pack Size + S - Props +*/ + +#define LZMA2_CONTROL_COPY_RESET_DIC 1 + +#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0) + +#define LZMA2_LCLP_MAX 4 +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +typedef enum +{ + LZMA2_STATE_CONTROL, + LZMA2_STATE_UNPACK0, + LZMA2_STATE_UNPACK1, + LZMA2_STATE_PACK0, + LZMA2_STATE_PACK1, + LZMA2_STATE_PROP, + LZMA2_STATE_DATA, + LZMA2_STATE_DATA_CONT, + LZMA2_STATE_FINISHED, + LZMA2_STATE_ERROR +} ELzma2State; + +static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) +{ + UInt32 dicSize; + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); + props[0] = (Byte)LZMA2_LCLP_MAX; + props[1] = (Byte)(dicSize); + props[2] = (Byte)(dicSize >> 8); + props[3] = (Byte)(dicSize >> 16); + props[4] = (Byte)(dicSize >> 24); + return SZ_OK; +} + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +void Lzma2Dec_Init(CLzma2Dec *p) +{ + p->state = LZMA2_STATE_CONTROL; + p->needInitLevel = 0xE0; + p->isExtraMode = False; + p->unpackSize = 0; + + // p->decoder.dicPos = 0; // we can use it instead of full init + LzmaDec_Init(&p->decoder); +} + +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +{ + switch (p->state) + { + case LZMA2_STATE_CONTROL: + p->isExtraMode = False; + p->control = b; + PRF(printf("\n %8X", (unsigned)p->decoder.dicPos)); + PRF(printf(" %02X", (unsigned)b)); + if (b == 0) + return LZMA2_STATE_FINISHED; + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (b == LZMA2_CONTROL_COPY_RESET_DIC) + p->needInitLevel = 0xC0; + else if (b > 2 || p->needInitLevel == 0xE0) + return LZMA2_STATE_ERROR; + } + else + { + if (b < p->needInitLevel) + return LZMA2_STATE_ERROR; + p->needInitLevel = 0; + p->unpackSize = (UInt32)(b & 0x1F) << 16; + } + return LZMA2_STATE_UNPACK0; + + case LZMA2_STATE_UNPACK0: + p->unpackSize |= (UInt32)b << 8; + return LZMA2_STATE_UNPACK1; + + case LZMA2_STATE_UNPACK1: + p->unpackSize |= (UInt32)b; + p->unpackSize++; + PRF(printf(" %7u", (unsigned)p->unpackSize)); + return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; + + case LZMA2_STATE_PACK0: + p->packSize = (UInt32)b << 8; + return LZMA2_STATE_PACK1; + + case LZMA2_STATE_PACK1: + p->packSize |= (UInt32)b; + p->packSize++; + // if (p->packSize < 5) return LZMA2_STATE_ERROR; + PRF(printf(" %5u", (unsigned)p->packSize)); + return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA; + + case LZMA2_STATE_PROP: + { + unsigned lc, lp; + if (b >= (9 * 5 * 5)) + return LZMA2_STATE_ERROR; + lc = b % 9; + b /= 9; + p->decoder.prop.pb = (Byte)(b / 5); + lp = b % 5; + if (lc + lp > LZMA2_LCLP_MAX) + return LZMA2_STATE_ERROR; + p->decoder.prop.lc = (Byte)lc; + p->decoder.prop.lp = (Byte)lp; + return LZMA2_STATE_DATA; + } + + default: + return LZMA2_STATE_ERROR; + } +} + +static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) +{ + memcpy(p->dic + p->dicPos, src, size); + p->dicPos += size; + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) + p->checkDicSize = p->prop.dicSize; + p->processedPos += (UInt32)size; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); + + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->state != LZMA2_STATE_ERROR) + { + SizeT dicPos; + + if (p->state == LZMA2_STATE_FINISHED) + { + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + dicPos = p->decoder.dicPos; + + if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + (*srcLen)++; + p->state = Lzma2Dec_UpdateState(p, *src++); + if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED) + break; + continue; + } + + { + SizeT inCur = inSize - *srcLen; + SizeT outCur = dicLimit - dicPos; + ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; + + if (outCur >= p->unpackSize) + { + outCur = (SizeT)p->unpackSize; + curFinishMode = LZMA_FINISH_END; + } + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); + LzmaDec_InitDicAndState(&p->decoder, initDic, False); + } + + if (inCur > outCur) + inCur = outCur; + if (inCur == 0) + break; + + LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur); + + src += inCur; + *srcLen += inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + SRes res; + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control >= 0xE0); + BoolInt initState = (p->control >= 0xA0); + LzmaDec_InitDicAndState(&p->decoder, initDic, initState); + p->state = LZMA2_STATE_DATA_CONT; + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + outCur = p->decoder.dicPos - dicPos; + p->unpackSize -= (UInt32)outCur; + + if (res != 0) + break; + + if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) + { + if (p->packSize == 0) + break; + return SZ_OK; + } + + if (inCur == 0 && outCur == 0) + { + if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + || p->unpackSize != 0 + || p->packSize != 0) + break; + p->state = LZMA2_STATE_CONTROL; + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + } + } + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; +} + + + + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, + const Byte *src, SizeT *srcLen, + int checkFinishBlock) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + + while (p->state != LZMA2_STATE_ERROR) + { + if (p->state == LZMA2_STATE_FINISHED) + return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK; + + if (outSize == 0 && !checkFinishBlock) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + (*srcLen)++; + + p->state = Lzma2Dec_UpdateState(p, *src++); + + if (p->state == LZMA2_STATE_UNPACK0) + { + // if (p->decoder.dicPos != 0) + if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0) + return LZMA2_PARSE_STATUS_NEW_BLOCK; + // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED; + } + + // The following code can be commented. + // It's not big problem, if we read additional input bytes. + // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state. + + if (outSize == 0 && p->state != LZMA2_STATE_FINISHED) + { + // checkFinishBlock is true. So we expect that block must be finished, + // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here + // break; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + } + + if (p->state == LZMA2_STATE_DATA) + return LZMA2_PARSE_STATUS_NEW_CHUNK; + + continue; + } + + if (outSize == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + { + SizeT inCur = inSize - *srcLen; + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + if (inCur > p->unpackSize) + inCur = p->unpackSize; + if (inCur > outSize) + inCur = outSize; + p->decoder.dicPos += inCur; + src += inCur; + *srcLen += inCur; + outSize -= inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + p->isExtraMode = True; + + if (inCur == 0) + { + if (p->packSize != 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + } + else if (p->state == LZMA2_STATE_DATA) + { + p->state = LZMA2_STATE_DATA_CONT; + if (*src != 0) + { + // first byte of lzma chunk must be Zero + *srcLen += 1; + p->packSize--; + break; + } + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + + if (p->packSize == 0) + { + SizeT rem = outSize; + if (rem > p->unpackSize) + rem = p->unpackSize; + p->decoder.dicPos += rem; + p->unpackSize -= (UInt32)rem; + outSize -= rem; + if (p->unpackSize == 0) + p->state = LZMA2_STATE_CONTROL; + } + } + } + } + + p->state = LZMA2_STATE_ERROR; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED; +} + + + + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen, inSize = *srcLen; + *srcLen = *destLen = 0; + + for (;;) + { + SizeT inCur = inSize, outCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + + if (p->decoder.dicPos == p->decoder.dicBufSize) + p->decoder.dicPos = 0; + dicPos = p->decoder.dicPos; + curFinishMode = LZMA_FINISH_ANY; + outCur = p->decoder.dicBufSize - dicPos; + + if (outCur >= outSize) + { + outCur = outSize; + curFinishMode = finishMode; + } + + res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + inSize -= inCur; + *srcLen += inCur; + outCur = p->decoder.dicPos - dicPos; + memcpy(dest, p->decoder.dic + dicPos, outCur); + dest += outCur; + outSize -= outCur; + *destLen += outCur; + if (res != 0) + return res; + if (outCur == 0 || outSize == 0) + return SZ_OK; + } +} + + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzma2Dec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + Lzma2Dec_CONSTRUCT(&p) + RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)) + p.decoder.dic = dest; + p.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&p); + *srcLen = inSize; + res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.decoder.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + Lzma2Dec_FreeProbs(&p, alloc); + return res; +} + +#undef PRF diff --git a/src/core/fex/7z_C/Lzma2Dec.h b/src/core/fex/7z_C/Lzma2Dec.h index 45b6767c..9b15ab04 100644 --- a/src/core/fex/7z_C/Lzma2Dec.h +++ b/src/core/fex/7z_C/Lzma2Dec.h @@ -1,82 +1,121 @@ -/* Lzma2Dec.h -- LZMA2 Decoder -2009-05-03 : Igor Pavlov : Public domain */ - -#ifndef __LZMA2_DEC_H -#define __LZMA2_DEC_H - -#include "LzmaDec.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---------- State Interface ---------- */ - -typedef struct -{ - CLzmaDec decoder; - UInt32 packSize; - UInt32 unpackSize; - int state; - Byte control; - Bool needInitDic; - Bool needInitState; - Bool needInitProp; -} CLzma2Dec; - -#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder) -#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc); -#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc); - -SRes Lzma2Dec_AllocateProbs(CLzma2Dec* p, Byte prop, ISzAlloc* alloc); -SRes Lzma2Dec_Allocate(CLzma2Dec* p, Byte prop, ISzAlloc* alloc); -void Lzma2Dec_Init(CLzma2Dec* p); - -/* -finishMode: - It has meaning only if the decoding reaches output limit (*destLen or dicLimit). - LZMA_FINISH_ANY - use smallest number of input bytes - LZMA_FINISH_END - read EndOfStream marker after decoding - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - SZ_ERROR_DATA - Data error -*/ - -SRes Lzma2Dec_DecodeToDic(CLzma2Dec* p, SizeT dicLimit, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -SRes Lzma2Dec_DecodeToBuf(CLzma2Dec* p, Byte* dest, SizeT* destLen, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -/* ---------- One Call Interface ---------- */ - -/* -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - use smallest number of input bytes - LZMA_FINISH_END - read EndOfStream marker after decoding - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). -*/ - -SRes Lzma2Decode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - Byte prop, ELzmaFinishMode finishMode, ELzmaStatus* status, ISzAlloc* alloc); - -#ifdef __cplusplus -} -#endif - -#endif +/* Lzma2Dec.h -- LZMA2 Decoder +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA2_DEC_H +#define ZIP7_INC_LZMA2_DEC_H + +#include "LzmaDec.h" + +EXTERN_C_BEGIN + +/* ---------- State Interface ---------- */ + +typedef struct +{ + unsigned state; + Byte control; + Byte needInitLevel; + Byte isExtraMode; + Byte _pad_; + UInt32 packSize; + UInt32 unpackSize; + CLzmaDec decoder; +} CLzma2Dec; + +#define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder) +#define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p) +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +void Lzma2Dec_Init(CLzma2Dec *p); + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen or dicLimit). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + SZ_ERROR_DATA - Data error +*/ + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- LZMA2 block and chunk parsing ---------- */ + +/* +Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data. +It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code: + - LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input. + - LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read. + CLzma2Dec::unpackSize contains unpack size of that chunk +*/ + +typedef enum +{ +/* + LZMA_STATUS_NOT_SPECIFIED // data error + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED // + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused +*/ + LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1, + LZMA2_PARSE_STATUS_NEW_CHUNK +} ELzma2ParseStatus; + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, // output size + const Byte *src, SizeT *srcLen, + int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position. + ); + +/* +LZMA2 parser doesn't decode LZMA chunks, so we must read + full input LZMA chunk to decode some part of LZMA chunk. + +Lzma2Dec_GetUnpackExtra() returns the value that shows + max possible number of output bytes that can be output by decoder + at current input positon. +*/ + +#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0) + + +/* ---------- One Call Interface ---------- */ + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/LzmaDec.c b/src/core/fex/7z_C/LzmaDec.c index 7914da28..b6bcd509 100644 --- a/src/core/fex/7z_C/LzmaDec.c +++ b/src/core/fex/7z_C/LzmaDec.c @@ -1,980 +1,1363 @@ -/* LzmaDec.c -- LZMA Decoder -2010-12-15 : Igor Pavlov : Public domain */ - -#include "LzmaDec.h" - -#include - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE \ - if (range < kTopValue) { \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - NORMALIZE; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) \ - IF_BIT_0(p) \ - { \ - UPDATE_0(p); \ - i = (i + i); \ - A0; \ - } \ - else \ - { \ - UPDATE_1(p); \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) - -#define TREE_GET_BIT(probs, i) \ - { \ - GET_BIT((probs + i), i); \ - } -#define TREE_DECODE(probs, limit, i) \ - { \ - i = 1; \ - do { \ - TREE_GET_BIT(probs, i); \ - } while (i < limit); \ - i -= limit; \ - } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { \ - i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; \ - } -#endif - -#define NORMALIZE_CHECK \ - if (range < kTopValue) { \ - if (buf >= bufLimit) \ - return DUMMY_ERROR; \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0_CHECK(p) \ - ttt = *(p); \ - NORMALIZE_CHECK; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK \ - range -= bound; \ - code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) \ - IF_BIT_0_CHECK(p) \ - { \ - UPDATE_0_CHECK; \ - i = (i + i); \ - A0; \ - } \ - else \ - { \ - UPDATE_1_CHECK; \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { \ - i = 1; \ - do { \ - GET_BIT_CHECK(probs + i, i) \ - } while (i < limit); \ - i -= limit; \ - } - -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - -#define kNumStates 12 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 768 - -#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) - -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG -#endif - -#define LZMA_DIC_MIN (1 << 12) - - /* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization -Out: - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker - = kMatchSpecLenStart + 2 : State Init Marker -*/ - - static int MY_FAST_CALL - LzmaDec_DecodeReal(CLzmaDec* p, SizeT limit, const Byte* bufLimit) -{ - CLzmaProb* probs = p->probs; - - unsigned state = p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; - unsigned lc = p->prop.lc; - - Byte* dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte* buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = processedPos & pbMask; - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) - { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (checkDicSize != 0 || processedPos != 0) - prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); - - if (state < kNumLitStates) { - state -= (state < 4) ? state : 3; - symbol = 1; - do { - GET_BIT(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - unsigned offs = 0x100; - state -= (state < 10) ? 3 : 6; - symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - dic[dicPos++] = (Byte)symbol; - processedPos++; - continue; - } - else - { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else - { - UPDATE_1(prob); - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) - { - UPDATE_0(prob); - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else - { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep1; - } - else - { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep2; - } - else - { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - { - unsigned _limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - _limit = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - _limit = (1 << kLenNumMidBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - _limit = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, _limit, len); - len += offset; - } - - if (state >= kNumStates) { - UInt32 distance; - prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) { - unsigned posSlot = (unsigned)distance; - int numDirectBits = (int)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) { - distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; - { - UInt32 mask = 1; - unsigned i = 1; - do { - GET_BIT2(prob + i, i, ;, distance |= mask); - mask <<= 1; - } while (--numDirectBits != 0); - } - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } while (--numDirectBits != 0); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; - GET_BIT2(prob + i, i, ;, distance |= 1); - GET_BIT2(prob + i, i, ;, distance |= 2); - GET_BIT2(prob + i, i, ;, distance |= 4); - GET_BIT2(prob + i, i, ;, distance |= 8); - } - if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; - if (checkDicSize == 0) { - if (distance >= processedPos) - return SZ_ERROR_DATA; - } else if (distance >= checkDicSize) - return SZ_ERROR_DATA; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; - } - - len += kMatchMinLen; - - if (limit == dicPos) - return SZ_ERROR_DATA; - { - SizeT rem = limit - dicPos; - unsigned curLen = ((rem < len) ? (unsigned)rem : len); - SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); - - processedPos += curLen; - - len -= curLen; - if (pos + curLen <= dicBufSize) { - Byte* dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte* lim = dest + curLen; - dicPos += curLen; - do - *(dest) = (Byte) * (dest + src); - while (++dest != lim); - } else { - do { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } while (--curLen != 0); - } - } - } - } while (dicPos < limit && buf < bufLimit); - NORMALIZE; - p->buf = buf; - p->range = range; - p->code = code; - p->remainLen = len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; - p->state = state; - - return SZ_OK; -} - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec* p, SizeT limit) -{ - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { - Byte* dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; - UInt32 rep0 = p->reps[0]; - if (limit - dicPos < len) - len = (unsigned)(limit - dicPos); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - - p->processedPos += len; - p->remainLen -= len; - while (len != 0) { - len--; - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec* p, SizeT limit, const Byte* bufLimit) -{ - do { - SizeT limit2 = limit; - if (p->checkDicSize == 0) { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - } - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); - if (p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - LzmaDec_WriteRem(p, limit); - } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - if (p->remainLen > kMatchSpecLenStart) { - p->remainLen = kMatchSpecLenStart; - } - return 0; -} - -typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec* p, const Byte* buf, SizeT inSize) -{ - UInt32 range = p->range; - UInt32 code = p->code; - const Byte* bufLimit = buf + inSize; - CLzmaProb* probs = p->probs; - unsigned state = p->state; - ELzmaDummy res; - - { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += (LZMA_LIT_SIZE * ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) { - unsigned symbol = 1; - do { - GET_BIT_CHECK(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else - { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else - { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else - { - UPDATE_1_CHECK; - } - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) { - unsigned posSlot; - prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) { - int numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } while (--numDirectBits != 0); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; - do { - GET_BIT_CHECK(prob + i, i); - } while (--numDirectBits != 0); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - -static void LzmaDec_InitRc(CLzmaDec* p, const Byte* data) -{ - p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; -} - -void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState) -{ - p->needFlush = 1; - p->remainLen = 0; - p->tempBufSize = 0; - - if (initDic) { - p->processedPos = 0; - p->checkDicSize = 0; - p->needInitState = 1; - } - if (initState) - p->needInitState = 1; -} - -void LzmaDec_Init(CLzmaDec* p) -{ - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - -static void LzmaDec_InitStateReal(CLzmaDec* p) -{ - UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); - UInt32 i; - CLzmaProb* probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} - -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, - ELzmaFinishMode finishMode, ELzmaStatus* status) -{ - SizeT inSize = *srcLen; - (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); - - *status = LZMA_STATUS_NOT_SPECIFIED; - - while (p->remainLen != kMatchSpecLenStart) { - int checkEndMarkNow; - - if (p->needFlush != 0) { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - - LzmaDec_InitRc(p, p->tempBuf); - p->tempBufSize = 0; - } - - checkEndMarkNow = 0; - if (p->dicPos >= dicLimit) { - if (p->remainLen == 0 && p->code == 0) { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->needInitState) - LzmaDec_InitStateReal(p); - - if (p->tempBufSize == 0) { - SizeT processed; - const Byte* bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } else { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); - if (dummyRes == DUMMY_ERROR) { - (*srcLen) += lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); - (*srcLen) += lookAhead; - src += lookAhead; - inSize -= lookAhead; - p->tempBufSize = 0; - } - } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; -} - -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) -{ - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } else { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc) -{ - alloc->Free(alloc, p->probs); - p->probs = 0; -} - -static void LzmaDec_FreeDict(CLzmaDec* p, ISzAlloc* alloc) -{ - alloc->Free(alloc, p->dic); - p->dic = 0; -} - -void LzmaDec_Free(CLzmaDec* p, ISzAlloc* alloc) -{ - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size) -{ - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); - - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - - p->lc = d % 9; - d /= 9; - p->pb = d / 5; - p->lp = d % 5; - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec* p, const CLzmaProps* propNew, ISzAlloc* alloc) -{ - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (p->probs == 0 || numProbs != p->numProbs) { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb*)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; - if (p->probs == 0) - return SZ_ERROR_MEM; - } - return SZ_OK; -} - -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) -{ - CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) -{ - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - dicBufSize = propNew.dicSize; - if (p->dic == 0 || dicBufSize != p->dicBufSize) { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte*)alloc->Alloc(alloc, dicBufSize); - if (p->dic == 0) { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc) -{ - CLzmaDec p; - SRes res; - SizeT outSize = *destLen, inSize = *srcLen; - *destLen = *srcLen = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; - if (inSize < RC_INIT_SIZE) - return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); - p.dic = dest; - p.dicBufSize = outSize; - LzmaDec_Init(&p); - *srcLen = inSize; - res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - *destLen = p.dicPos; - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - LzmaDec_FreeProbs(&p, alloc); - return res; -} +/* LzmaDec.c -- LZMA Decoder +2023-04-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) + +#define RC_INIT_SIZE 5 + +#ifndef Z7_LZMA_DEC_OPT + +#define kNumMoveBits 5 +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } + +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define Z7_LZMA_SIZE_OPT */ + +#ifdef Z7_LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + +#endif // Z7_LZMA_DEC_OPT + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) + +#define kNumStates 12 +#define kNumStates2 16 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) + +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif + +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + +#define LZMA_DIC_MIN (1 << 12) + +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. + } + +Processing: + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. + +Out: + RangeCoder is normalized + Result: + SZ_OK - OK + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined +*/ + + +#ifdef Z7_LZMA_DEC_OPT + +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob) + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob) + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + UPDATE_0(prob) + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob) + } + else + { + UInt32 distance; + UPDATE_1(prob) + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep1; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep2; + } + else + { + UPDATE_1(prob) + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef Z7_LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len) + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + len -= 8; + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) + len += kLenNumLowSymbols * 2; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance) + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos; + { + UInt32 m = 1; + distance++; + do + { + REV_BIT_VAR(prob, distance, m) + } + while (--numDirectBits); + distance -= m; + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) + distance |= i; + } + if (distance == (UInt32)0xFFFFFFFF) + { + len = kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) + { + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += (UInt32)curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += (SizeT)curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = (UInt32)state; + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; + return SZ_OK; +} +#endif + + + +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; + { + SizeT dicPos = p->dicPos; + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do + { + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + while (--len); + p->dicPos = dicPos; + } +} + + +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + return res; + } +} + + + +typedef enum +{ + DUMMY_INPUT_EOF, /* need more input data */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = *bufOut; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + ELzmaDummy res; + + for (;;) + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + break; + } + else + { + UPDATE_1_CHECK + } + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len) + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + unsigned m = 1; + do + { + REV_BIT_CHECK(prob, i, m) + } + while (--numDirectBits); + } + } + } + } + break; + } + NORMALIZE_CHECK + + *bufOut = buf; + return res; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) +{ + p->remainLen = kMatchSpecLenStart + 1; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->remainLen = kMatchSpecLenStart + 2; + } + if (initState) + p->remainLen = kMatchSpecLenStart + 2; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN_NOT_FINISHED_FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + if (p->remainLen > kMatchSpecLenStart) + { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + for (;;) + { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + + int checkEndMarkNow = 0; + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + RETURN_NOT_FINISHED_FOR_FINISH + } + checkEndMarkNow = 1; + } + + // (p->remainLen == 0) + + if (p->tempBufSize == 0) + { + const Byte *bufLimit; + int dummyProcessed = -1; + + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; + (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + + bufLimit = src; + // we will decode only one iteration + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + + p->buf = src; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + } + + p->buf = p->tempBuf; + + { + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + } + } + } + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; +} + + + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = (Byte)(d % 9); + d /= 9; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/src/core/fex/7z_C/LzmaDec.h b/src/core/fex/7z_C/LzmaDec.h index 4fd3e6a9..6432f608 100644 --- a/src/core/fex/7z_C/LzmaDec.h +++ b/src/core/fex/7z_C/LzmaDec.h @@ -1,226 +1,237 @@ -/* LzmaDec.h -- LZMA Decoder -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, - but memory usage for CLzmaDec::probs will be doubled in that case */ - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - -/* ---------- LZMA Properties ---------- */ - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaProps { - unsigned lc, lp, pb; - UInt32 dicSize; -} CLzmaProps; - -/* LzmaProps_Decode - decodes properties -Returns: - SZ_OK - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); - -/* ---------- LZMA Decoder state ---------- */ - -/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. - Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ - -#define LZMA_REQUIRED_INPUT_MAX 20 - -typedef struct -{ - CLzmaProps prop; - CLzmaProb* probs; - Byte* dic; - const Byte* buf; - UInt32 range, code; - SizeT dicPos; - SizeT dicBufSize; - UInt32 processedPos; - UInt32 checkDicSize; - unsigned state; - UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; - UInt32 numProbs; - unsigned tempBufSize; - Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; -} CLzmaDec; - -#define LzmaDec_Construct(p) \ - { \ - (p)->dic = 0; \ - (p)->probs = 0; \ - } - -void LzmaDec_Init(CLzmaDec* p); - -/* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ - -typedef enum { - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ -} ELzmaFinishMode; - -/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! - - You must use LZMA_FINISH_END, when you know that current output buffer - covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. - - If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, - and output value of destLen will be less than output buffer size limit. - You can check status result also. - - You can use multiple checks to test data integrity after full decompression: - 1) Check Result and "status" variable. - 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. - 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. - You must use correct finish mode in that case. */ - -typedef enum { - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ -} ELzmaStatus; - -/* ELzmaStatus is used only as output value for function call */ - -/* ---------- Interfaces ---------- */ - -/* There are 3 levels of interfaces: - 1) Dictionary Interface - 2) Buffer Interface - 3) One Call Interface - You can select any of these interfaces, but don't mix functions from different - groups for same object. */ - -/* There are two variants to allocate state for Dictionary Interface: - 1) LzmaDec_Allocate / LzmaDec_Free - 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs - You can use variant 2, if you set dictionary buffer manually. - For Buffer Interface you must always use variant 1. - -LzmaDec_Allocate* can return: - SZ_OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc); - -SRes LzmaDec_Allocate(CLzmaDec* state, const Byte* prop, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); - -/* ---------- Dictionary Interface ---------- */ - -/* You can use it, if you want to eliminate the overhead for data copying from - dictionary to some other external buffer. - You must work with CLzmaDec variables directly in this interface. - - STEPS: - LzmaDec_Constr() - LzmaDec_Allocate() - for (each new stream) - { - LzmaDec_Init() - while (it needs more decompression) - { - LzmaDec_DecodeToDic() - use data from CLzmaDec::dic and update CLzmaDec::dicPos - } - } - LzmaDec_Free() -*/ - -/* LzmaDec_DecodeToDic - - The decoding to internal dictionary buffer (CLzmaDec::dic). - You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! - -finishMode: - It has meaning only if the decoding reaches output limit (dicLimit). - LZMA_FINISH_ANY - Decode just dicLimit bytes. - LZMA_FINISH_END - Stream must be finished after dicLimit. - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error -*/ - -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -/* ---------- Buffer Interface ---------- */ - -/* It's zlib-like interface. - See LzmaDec_DecodeToDic description for information about STEPS and return results, - but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need - to work with CLzmaDec variables manually. - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). -*/ - -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); - -/* ---------- One Call Interface ---------- */ - -/* LzmaDecode - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). -*/ - -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc); - -#ifdef __cplusplus -} -#endif - -#endif +/* LzmaDec.h -- LZMA Decoder +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +typedef +#ifdef Z7_LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct +{ + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + /* Don't change this structure. ASM code can use it. */ + CLzmaProps prop; + CLzmaProb *probs; + CLzmaProb *probs_1664; + Byte *dic; + SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; + UInt32 state; + UInt32 remainLen; + + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Construct() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Ppmd.h b/src/core/fex/7z_C/Ppmd.h index 7c7f1f96..da1ed375 100644 --- a/src/core/fex/7z_C/Ppmd.h +++ b/src/core/fex/7z_C/Ppmd.h @@ -1,93 +1,169 @@ -/* Ppmd.h -- PPMD codec common code -2011-01-27 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -#ifndef __PPMD_H -#define __PPMD_H - -#include "CpuArch.h" -#include "Types.h" - -EXTERN_C_BEGIN - -#ifdef MY_CPU_32BIT -#define PPMD_32BIT -#endif - -#define PPMD_INT_BITS 7 -#define PPMD_PERIOD_BITS 7 -#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS)) - -#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift)) -#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2) -#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob)) -#define PPMD_UPDATE_PROB_1(prob) ((prob)-PPMD_GET_MEAN(prob)) - -#define PPMD_N1 4 -#define PPMD_N2 4 -#define PPMD_N3 4 -#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) -#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) - -#pragma pack(push, 1) -/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ - -/* SEE-contexts for PPM-contexts with masked symbols */ -typedef struct -{ - UInt16 Summ; /* Freq */ - Byte Shift; /* Speed of Freq change; low Shift is for fast change */ - Byte Count; /* Count to next change of Shift */ -} CPpmd_See; - -#define Ppmd_See_Update(p) \ - if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) { \ - (p)->Summ <<= 1; \ - (p)->Count = (Byte)(3 << (p)->Shift++); \ - } - -typedef struct -{ - Byte Symbol; - Byte Freq; - UInt16 SuccessorLow; - UInt16 SuccessorHigh; -} CPpmd_State; - -#pragma pack(pop) - -typedef -#ifdef PPMD_32BIT - CPpmd_State* -#else - UInt32 -#endif - CPpmd_State_Ref; - -typedef -#ifdef PPMD_32BIT - void* -#else - UInt32 -#endif - CPpmd_Void_Ref; - -typedef -#ifdef PPMD_32BIT - Byte* -#else - UInt32 -#endif - CPpmd_Byte_Ref; - -#define PPMD_SetAllBitsIn256Bytes(p) \ - { \ - unsigned i; \ - for (i = 0; i < 256 / sizeof(p[0]); i += 8) { \ - p[i + 7] = p[i + 6] = p[i + 5] = p[i + 4] = p[i + 3] = p[i + 2] = p[i + 1] = p[i + 0] = ~(size_t)0; \ - } \ - } - -EXTERN_C_END - -#endif +/* Ppmd.h -- PPMD codec common code +2023-03-05 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#ifndef ZIP7_INC_PPMD_H +#define ZIP7_INC_PPMD_H + +#include "CpuArch.h" + +EXTERN_C_BEGIN + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ + #define PPMD_32BIT +#endif + +#define PPMD_INT_BITS 7 +#define PPMD_PERIOD_BITS 7 +#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS)) + +#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift)) +#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2) +#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob)) +#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob)) + +#define PPMD_N1 4 +#define PPMD_N2 4 +#define PPMD_N3 4 +#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) +#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) + +MY_CPU_pragma_pack_push_1 +/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ + +/* SEE-contexts for PPM-contexts with masked symbols */ +typedef struct +{ + UInt16 Summ; /* Freq */ + Byte Shift; /* Speed of Freq change; low Shift is for fast change */ + Byte Count; /* Count to next change of Shift */ +} CPpmd_See; + +#define Ppmd_See_UPDATE(p) \ + { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ + { (p)->Summ = (UInt16)((p)->Summ << 1); \ + (p)->Count = (Byte)(3 << (p)->Shift++); }} + + +typedef struct +{ + Byte Symbol; + Byte Freq; + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State; + +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; + +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; + +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif + + +#define PPMD_SetAllBitsIn256Bytes(p) \ + { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ + p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }} + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Ppmd7.c b/src/core/fex/7z_C/Ppmd7.c index 1b90efc2..29ff621c 100644 --- a/src/core/fex/7z_C/Ppmd7.c +++ b/src/core/fex/7z_C/Ppmd7.c @@ -1,667 +1,1131 @@ -/* Ppmd7.c -- PPMdH codec -2010-03-12 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -#include - -#include "Ppmd7.h" - -const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; -static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051 }; - -#define MAX_FREQ 124 -#define UNIT_SIZE 12 - -#define U2B(nu) ((UInt32)(nu)*UNIT_SIZE) -#define U2I(nu) (p->Units2Indx[(nu)-1]) -#define I2U(indx) (p->Indx2Units[indx]) - -#ifdef PPMD_32BIT -#define REF(ptr) (ptr) -#else -#define REF(ptr) ((UInt32)((Byte*)(ptr) - (p)->Base)) -#endif - -#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) - -#define CTX(ref) ((CPpmd7_Context*)Ppmd7_GetContext(p, ref)) -#define STATS(ctx) Ppmd7_GetStats(p, ctx) -#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) -#define SUFFIX(ctx) CTX((ctx)->Suffix) - -typedef CPpmd7_Context* CTX_PTR; - -struct CPpmd7_Node_; - -typedef -#ifdef PPMD_32BIT - struct CPpmd7_Node_* -#else - UInt32 -#endif - CPpmd7_Node_Ref; - -typedef struct CPpmd7_Node_ { - UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */ - UInt16 NU; - CPpmd7_Node_Ref Next; /* must be at offset >= 4 */ - CPpmd7_Node_Ref Prev; -} CPpmd7_Node; - -#ifdef PPMD_32BIT -#define NODE(ptr) (ptr) -#else -#define NODE(offs) ((CPpmd7_Node*)(p->Base + (offs))) -#endif - -void Ppmd7_Construct(CPpmd7* p) -{ - unsigned i, k, m; - - p->Base = 0; - - for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) { - unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); - do { - p->Units2Indx[k++] = (Byte)i; - } while (--step); - p->Indx2Units[i] = (Byte)k; - } - - p->NS2BSIndx[0] = (0 << 1); - p->NS2BSIndx[1] = (1 << 1); - memset(p->NS2BSIndx + 2, (2 << 1), 9); - memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); - - for (i = 0; i < 3; i++) - p->NS2Indx[i] = (Byte)i; - for (m = i, k = 1; i < 256; i++) { - p->NS2Indx[i] = (Byte)m; - if (--k == 0) - k = (++m) - 2; - } - - memset(p->HB2Flag, 0, 0x40); - memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40); -} - -void Ppmd7_Free(CPpmd7* p, ISzAlloc* alloc) -{ - alloc->Free(alloc, p->Base); - p->Size = 0; - p->Base = 0; -} - -Bool Ppmd7_Alloc(CPpmd7* p, UInt32 size, ISzAlloc* alloc) -{ - if (p->Base == 0 || p->Size != size) { - Ppmd7_Free(p, alloc); - p->AlignOffset = -#ifdef PPMD_32BIT - (4 - size) & 3; -#else - 4 - (size & 3); -#endif - if ((p->Base = (Byte*)alloc->Alloc(alloc, p->AlignOffset + size -#ifndef PPMD_32BIT - + UNIT_SIZE -#endif - )) - == 0) - return False; - p->Size = size; - } - return True; -} - -static void InsertNode(CPpmd7* p, void* node, unsigned indx) -{ - *((CPpmd_Void_Ref*)node) = p->FreeList[indx]; - p->FreeList[indx] = REF(node); -} - -static void* RemoveNode(CPpmd7* p, unsigned indx) -{ - CPpmd_Void_Ref* node = (CPpmd_Void_Ref*)Ppmd7_GetPtr(p, p->FreeList[indx]); - p->FreeList[indx] = *node; - return node; -} - -static void SplitBlock(CPpmd7* p, void* ptr, unsigned oldIndx, unsigned newIndx) -{ - unsigned i, nu = I2U(oldIndx) - I2U(newIndx); - ptr = (Byte*)ptr + U2B(I2U(newIndx)); - if (I2U(i = U2I(nu)) != nu) { - unsigned k = I2U(--i); - InsertNode(p, ((Byte*)ptr) + U2B(k), nu - k - 1); - } - InsertNode(p, ptr, i); -} - -static void GlueFreeBlocks(CPpmd7* p) -{ -#ifdef PPMD_32BIT - CPpmd7_Node headItem; - CPpmd7_Node_Ref head = &headItem; -#else - CPpmd7_Node_Ref head = p->AlignOffset + p->Size; -#endif - - CPpmd7_Node_Ref n = head; - unsigned i; - - p->GlueCount = 255; - - /* create doubly-linked list of free blocks */ - for (i = 0; i < PPMD_NUM_INDEXES; i++) { - UInt16 nu = I2U(i); - CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; - p->FreeList[i] = 0; - while (next != 0) { - CPpmd7_Node* node = NODE(next); - node->Next = n; - n = NODE(n)->Prev = next; - next = *(const CPpmd7_Node_Ref*)node; - node->Stamp = 0; - node->NU = (UInt16)nu; - } - } - NODE(head) - ->Stamp - = 1; - NODE(head) - ->Next - = n; - NODE(n) - ->Prev - = head; - if (p->LoUnit != p->HiUnit) - ((CPpmd7_Node*)p->LoUnit)->Stamp = 1; - - /* Glue free blocks */ - while (n != head) { - CPpmd7_Node* node = NODE(n); - UInt32 nu = (UInt32)node->NU; - for (;;) { - CPpmd7_Node* node2 = NODE(n) + nu; - nu += node2->NU; - if (node2->Stamp != 0 || nu >= 0x10000) - break; - NODE(node2->Prev) - ->Next - = node2->Next; - NODE(node2->Next) - ->Prev - = node2->Prev; - node->NU = (UInt16)nu; - } - n = node->Next; - } - - /* Fill lists of free blocks */ - for (n = NODE(head)->Next; n != head;) { - CPpmd7_Node* node = NODE(n); - unsigned nu; - CPpmd7_Node_Ref next = node->Next; - for (nu = node->NU; nu > 128; nu -= 128, node += 128) - InsertNode(p, node, PPMD_NUM_INDEXES - 1); - if (I2U(i = U2I(nu)) != nu) { - unsigned k = I2U(--i); - InsertNode(p, node + k, nu - k - 1); - } - InsertNode(p, node, i); - n = next; - } -} - -static void* AllocUnitsRare(CPpmd7* p, unsigned indx) -{ - unsigned i; - void* retVal; - if (p->GlueCount == 0) { - GlueFreeBlocks(p); - if (p->FreeList[indx] != 0) - return RemoveNode(p, indx); - } - i = indx; - do { - if (++i == PPMD_NUM_INDEXES) { - UInt32 numBytes = U2B(I2U(indx)); - p->GlueCount--; - return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL); - } - } while (p->FreeList[i] == 0); - retVal = RemoveNode(p, i); - SplitBlock(p, retVal, i, indx); - return retVal; -} - -static void* AllocUnits(CPpmd7* p, unsigned indx) -{ - UInt32 numBytes; - if (p->FreeList[indx] != 0) - return RemoveNode(p, indx); - numBytes = U2B(I2U(indx)); - if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit)) { - void* retVal = p->LoUnit; - p->LoUnit += numBytes; - return retVal; - } - return AllocUnitsRare(p, indx); -} - -#define MyMem12Cpy(dest, src, num) \ - { \ - UInt32* d = (UInt32*)dest; \ - const UInt32* s = (const UInt32*)src; \ - UInt32 n = num; \ - do { \ - d[0] = s[0]; \ - d[1] = s[1]; \ - d[2] = s[2]; \ - s += 3; \ - d += 3; \ - } while (--n); \ - } - -static void* ShrinkUnits(CPpmd7* p, void* oldPtr, unsigned oldNU, unsigned newNU) -{ - unsigned i0 = U2I(oldNU); - unsigned i1 = U2I(newNU); - if (i0 == i1) - return oldPtr; - if (p->FreeList[i1] != 0) { - void* ptr = RemoveNode(p, i1); - MyMem12Cpy(ptr, oldPtr, newNU); - InsertNode(p, oldPtr, i0); - return ptr; - } - SplitBlock(p, oldPtr, i0, i1); - return oldPtr; -} - -#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16))) - -static void SetSuccessor(CPpmd_State* p, CPpmd_Void_Ref v) -{ - (p)->SuccessorLow = (UInt16)((UInt32)(v)&0xFFFF); - (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF); -} - -static void RestartModel(CPpmd7* p) -{ - unsigned i, k, m; - - memset(p->FreeList, 0, sizeof(p->FreeList)); - p->Text = p->Base + p->AlignOffset; - p->HiUnit = p->Text + p->Size; - p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; - p->GlueCount = 0; - - p->OrderFall = p->MaxOrder; - p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; - p->PrevSuccess = 0; - - p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ - p->MinContext->Suffix = 0; - p->MinContext->NumStats = 256; - p->MinContext->SummFreq = 256 + 1; - p->FoundState = (CPpmd_State*)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ - p->LoUnit += U2B(256 / 2); - p->MinContext->Stats = REF(p->FoundState); - for (i = 0; i < 256; i++) { - CPpmd_State* s = &p->FoundState[i]; - s->Symbol = (Byte)i; - s->Freq = 1; - SetSuccessor(s, 0); - } - - for (i = 0; i < 128; i++) - for (k = 0; k < 8; k++) { - UInt16* dest = p->BinSumm[i] + k; - UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); - for (m = 0; m < 64; m += 8) - dest[m] = val; - } - - for (i = 0; i < 25; i++) - for (k = 0; k < 16; k++) { - CPpmd_See* s = &p->See[i][k]; - s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4)); - s->Count = 4; - } -} - -void Ppmd7_Init(CPpmd7* p, unsigned maxOrder) -{ - p->MaxOrder = maxOrder; - RestartModel(p); - p->DummySee.Shift = PPMD_PERIOD_BITS; - p->DummySee.Summ = 0; /* unused */ - p->DummySee.Count = 64; /* unused */ -} - -static CTX_PTR CreateSuccessors(CPpmd7* p, Bool skip) -{ - CPpmd_State upState; - CTX_PTR c = p->MinContext; - CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); - CPpmd_State* ps[PPMD7_MAX_ORDER]; - unsigned numPs = 0; - - if (!skip) - ps[numPs++] = p->FoundState; - - while (c->Suffix) { - CPpmd_Void_Ref successor; - CPpmd_State* s; - c = SUFFIX(c); - if (c->NumStats != 1) { - for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++) - ; - } else - s = ONE_STATE(c); - successor = SUCCESSOR(s); - if (successor != upBranch) { - c = CTX(successor); - if (numPs == 0) - return c; - break; - } - ps[numPs++] = s; - } - - upState.Symbol = *(const Byte*)Ppmd7_GetPtr(p, upBranch); - SetSuccessor(&upState, upBranch + 1); - - if (c->NumStats == 1) - upState.Freq = ONE_STATE(c)->Freq; - else { - UInt32 cf, s0; - CPpmd_State* s; - for (s = STATS(c); s->Symbol != upState.Symbol; s++) - ; - cf = s->Freq - 1; - s0 = c->SummFreq - c->NumStats - cf; - upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0)))); - } - - do { - /* Create Child */ - CTX_PTR c1; /* = AllocContext(p); */ - if (p->HiUnit != p->LoUnit) - c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); - else if (p->FreeList[0] != 0) - c1 = (CTX_PTR)RemoveNode(p, 0); - else { - c1 = (CTX_PTR)AllocUnitsRare(p, 0); - if (!c1) - return NULL; - } - c1->NumStats = 1; - *ONE_STATE(c1) = upState; - c1->Suffix = REF(c); - SetSuccessor(ps[--numPs], REF(c1)); - c = c1; - } while (numPs != 0); - - return c; -} - -static void SwapStates(CPpmd_State* t1, CPpmd_State* t2) -{ - CPpmd_State tmp = *t1; - *t1 = *t2; - *t2 = tmp; -} - -static void UpdateModel(CPpmd7* p) -{ - CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState); - CTX_PTR c; - unsigned s0, ns; - - if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) { - c = SUFFIX(p->MinContext); - - if (c->NumStats == 1) { - CPpmd_State* s = ONE_STATE(c); - if (s->Freq < 32) - s->Freq++; - } else { - CPpmd_State* s = STATS(c); - if (s->Symbol != p->FoundState->Symbol) { - do { - s++; - } while (s->Symbol != p->FoundState->Symbol); - if (s[0].Freq >= s[-1].Freq) { - SwapStates(&s[0], &s[-1]); - s--; - } - } - if (s->Freq < MAX_FREQ - 9) { - s->Freq += 2; - c->SummFreq += 2; - } - } - } - - if (p->OrderFall == 0) { - p->MinContext = p->MaxContext = CreateSuccessors(p, True); - if (p->MinContext == 0) { - RestartModel(p); - return; - } - SetSuccessor(p->FoundState, REF(p->MinContext)); - return; - } - - *p->Text++ = p->FoundState->Symbol; - successor = REF(p->Text); - if (p->Text >= p->UnitsStart) { - RestartModel(p); - return; - } - - if (fSuccessor) { - if (fSuccessor <= successor) { - CTX_PTR cs = CreateSuccessors(p, False); - if (cs == NULL) { - RestartModel(p); - return; - } - fSuccessor = REF(cs); - } - if (--p->OrderFall == 0) { - successor = fSuccessor; - p->Text -= (p->MaxContext != p->MinContext); - } - } else { - SetSuccessor(p->FoundState, successor); - fSuccessor = REF(p->MinContext); - } - - s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1); - - for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c)) { - unsigned ns1; - UInt32 cf, sf; - if ((ns1 = c->NumStats) != 1) { - if ((ns1 & 1) == 0) { - /* Expand for one UNIT */ - unsigned oldNU = ns1 >> 1; - unsigned i = U2I(oldNU); - if (i != U2I(oldNU + 1)) { - void* ptr = AllocUnits(p, i + 1); - void* oldPtr; - if (!ptr) { - RestartModel(p); - return; - } - oldPtr = STATS(c); - MyMem12Cpy(ptr, oldPtr, oldNU); - InsertNode(p, oldPtr, i); - c->Stats = STATS_REF(ptr); - } - } - c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1))); - } else { - CPpmd_State* s = (CPpmd_State*)AllocUnits(p, 0); - if (!s) { - RestartModel(p); - return; - } - *s = *ONE_STATE(c); - c->Stats = REF(s); - if (s->Freq < MAX_FREQ / 4 - 1) - s->Freq <<= 1; - else - s->Freq = MAX_FREQ - 4; - c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3)); - } - cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6); - sf = (UInt32)s0 + c->SummFreq; - if (cf < 6 * sf) { - cf = 1 + (cf > sf) + (cf >= 4 * sf); - c->SummFreq += 3; - } else { - cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); - c->SummFreq = (UInt16)(c->SummFreq + cf); - } - { - CPpmd_State* s = STATS(c) + ns1; - SetSuccessor(s, successor); - s->Symbol = p->FoundState->Symbol; - s->Freq = (Byte)cf; - c->NumStats = (UInt16)(ns1 + 1); - } - } - p->MaxContext = p->MinContext = CTX(fSuccessor); -} - -static void Rescale(CPpmd7* p) -{ - unsigned i, adder, sumFreq, escFreq; - CPpmd_State* stats = STATS(p->MinContext); - CPpmd_State* s = p->FoundState; - { - CPpmd_State tmp = *s; - for (; s != stats; s--) - s[0] = s[-1]; - *s = tmp; - } - escFreq = p->MinContext->SummFreq - s->Freq; - s->Freq += 4; - adder = (p->OrderFall != 0); - s->Freq = (Byte)((s->Freq + adder) >> 1); - sumFreq = s->Freq; - - i = p->MinContext->NumStats - 1; - do { - escFreq -= (++s)->Freq; - s->Freq = (Byte)((s->Freq + adder) >> 1); - sumFreq += s->Freq; - if (s[0].Freq > s[-1].Freq) { - CPpmd_State* s1 = s; - CPpmd_State tmp = *s1; - do - s1[0] = s1[-1]; - while (--s1 != stats && tmp.Freq > s1[-1].Freq); - *s1 = tmp; - } - } while (--i); - - if (s->Freq == 0) { - unsigned numStats = p->MinContext->NumStats; - unsigned n0, n1; - do { - i++; - } while ((--s)->Freq == 0); - escFreq += i; - p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i); - if (p->MinContext->NumStats == 1) { - CPpmd_State tmp = *stats; - do { - tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1)); - escFreq >>= 1; - } while (escFreq > 1); - InsertNode(p, stats, U2I(((numStats + 1) >> 1))); - *(p->FoundState = ONE_STATE(p->MinContext)) = tmp; - return; - } - n0 = (numStats + 1) >> 1; - n1 = (p->MinContext->NumStats + 1) >> 1; - if (n0 != n1) - p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); - } - p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); - p->FoundState = STATS(p->MinContext); -} - -CPpmd_See* Ppmd7_MakeEscFreq(CPpmd7* p, unsigned numMasked, UInt32* escFreq) -{ - CPpmd_See* see; - unsigned nonMasked = p->MinContext->NumStats - numMasked; - if (p->MinContext->NumStats != 256) { - see = p->See[p->NS2Indx[nonMasked - 1]] + (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) + 2 * (p->MinContext->SummFreq < 11 * p->MinContext->NumStats) + 4 * (numMasked > nonMasked) + p->HiBitsFlag; - { - unsigned r = (see->Summ >> see->Shift); - see->Summ = (UInt16)(see->Summ - r); - *escFreq = r + (r == 0); - } - } else { - see = &p->DummySee; - *escFreq = 1; - } - return see; -} - -static void NextContext(CPpmd7* p) -{ - CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); - if (p->OrderFall == 0 && (Byte*)c > p->Text) - p->MinContext = p->MaxContext = c; - else - UpdateModel(p); -} - -void Ppmd7_Update1(CPpmd7* p) -{ - CPpmd_State* s = p->FoundState; - s->Freq += 4; - p->MinContext->SummFreq += 4; - if (s[0].Freq > s[-1].Freq) { - SwapStates(&s[0], &s[-1]); - p->FoundState = --s; - if (s->Freq > MAX_FREQ) - Rescale(p); - } - NextContext(p); -} - -void Ppmd7_Update1_0(CPpmd7* p) -{ - p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq); - p->RunLength += p->PrevSuccess; - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) - Rescale(p); - NextContext(p); -} - -void Ppmd7_UpdateBin(CPpmd7* p) -{ - p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1 : 0)); - p->PrevSuccess = 1; - p->RunLength++; - NextContext(p); -} - -void Ppmd7_Update2(CPpmd7* p) -{ - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) - Rescale(p); - p->RunLength = p->InitRL; - UpdateModel(p); -} +/* Ppmd7.c -- PPMdH codec +2023-09-07 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd7.h" + +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define STATS(ctx) Ppmd7_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd7_Context * PPMD7_CTX_PTR; + +struct CPpmd7_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; + +typedef struct CPpmd7_Node_ +{ + UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */ + UInt16 NU; + CPpmd7_Node_Ref Next; /* must be at offset >= 4 */ + CPpmd7_Node_Ref Prev; +} CPpmd7_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) + +void Ppmd7_Construct(CPpmd7 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 3; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 256; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 2; + } + + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); +} + + +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd7_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode() +*/ + +#define EMPTY_NODE 0 + + +static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx) +{ + *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + + p->FreeList[indx] = REF(node); + +} + + +static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx) +{ + CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); + p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; + return node; +} + + +static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd7_InsertNode(p, ptr, i); +} + + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + +static void Ppmd7_GlueFreeBlocks(CPpmd7 *p) +{ + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + + p->GlueCount = 255; + + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + + { + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } + } + } + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + + /* Fill lists of free blocks */ + for (n = head; n != 0;) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd7_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd7_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd7_RemoveNode(p, i); + Ppmd7_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd7_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)(dest); \ + const UInt32 *z = (const UInt32 *)(src); \ + unsigned n = (num); \ + do { \ + d[0] = z[0]; \ + d[1] = z[1]; \ + d[2] = z[2]; \ + z += 3; \ + d += 3; \ + } while (--n); \ + } + + +/* +static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd7_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd7_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} +*/ + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + + + +Z7_NO_INLINE +static +void Ppmd7_RestartModel(CPpmd7 *p) +{ + unsigned i, k; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + + p->Text = p->Base + p->AlignOffset; + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif + } + + for (i = 0; i < 128; i++) + + + + for (k = 0; k < 8; k++) + { + unsigned m; + UInt16 *dest = p->BinSumm[i] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2)); + for (m = 0; m < 64; m += 8) + dest[m] = val; + } + + + for (i = 0; i < 25; i++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 4; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) +{ + p->MaxOrder = maxOrder; + + Ppmd7_RestartModel(p); +} + + + +/* + Ppmd7_CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +Z7_NO_INLINE +static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = p->MinContext; + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq; + unsigned numPs = 0; + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + + if (c->NumStats != 1) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + + } + else + { + s = ONE_STATE(c); + + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + // (c) is real record Context here, + c = CTX(successor); + if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. + return c; + } + break; + } + ps[numPs++] = s; + } + + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + + + if (c->NumStats == 1) + newFreq = ONE_STATE(c)->Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); + } + + // Create new single-symbol contexts from low order to high order in loop + + do + { + PPMD7_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0); + else + { + c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + + c1->NumStats = 1; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + + +#define SWAP_STATES(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +Z7_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor; + PPMD7_CTX_PTR c, mc; + unsigned s0, ns; + + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 1) + { + CPpmd_State *s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + CPpmd_State *s = STATS(c); + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) + { + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(s) + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + + if (p->OrderFall == 0) + { + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p); + if (!p->MinContext) + { + Ppmd7_RestartModel(p); + return; + } + SetSuccessor(p->FoundState, REF(p->MinContext)); + return; + } + + + /* NON-MAX ORDER context */ + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + Ppmd7_RestartModel(p); + return; + } + maxSuccessor = REF(text); + } + + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) + { + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) + { + // minSuccessor is RAW-Successor. So we will create real contexts records: + PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p); + if (!cs) + { + Ppmd7_RestartModel(p); + return; + } + minSuccessor = REF(cs); + } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + + if (--p->OrderFall == 0) + { + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ + p->Text -= (p->MaxContext != p->MinContext); + } + } + else + { + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); + } + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 1) + { + if ((ns1 & 1) == 0) + { + /* Expand for one UNIT */ + const unsigned oldNU = ns1 >> 1; + const unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd7_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + Ppmd7_RestartModel(p); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd7_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1))); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + // instead of One-symbol context we create 2-symbol context + CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0); + if (!s) + { + Ppmd7_RestartModel(p); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = (UInt32)(freq + p->InitEsc + (ns > 3)); + } + } + + { + CPpmd_State *s = STATS(c) + ns1; + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = p->FoundState->Symbol; + c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + c = SUFFIX(c); + } + while (c != mc); +} + + + +Z7_NO_INLINE +static void Ppmd7_Rescale(CPpmd7 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) + { + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + + do + { + escFreq >>= 1; + freq = (freq + 1) >> 1; + } + while (escFreq > 1); + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + Ppmd7_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 1) >> 1; + if (n0 != n1) + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, n1) + Ppmd7_InsertNode(p, stats, i0); + } + else + Ppmd7_SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) + { + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + + p->HiBitsFlag; + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + const unsigned summ = (UInt16)see->Summ; // & 0xFFFF + const unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = (UInt32)(r + (r == 0)); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd7_NextContext(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); +} + + +void Ppmd7_Update1(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s) + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + } + Ppmd7_NextContext(p); +} + + +void Ppmd7_Update1_0(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + const unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (Int32)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_NextContext(p); +} + + +/* +void Ppmd7_UpdateBin(CPpmd7 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); + p->PrevSuccess = 1; + p->RunLength++; + Ppmd7_NextContext(p); +} +*/ + +void Ppmd7_Update2(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_UpdateModel(p); +} + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls Ppmd7_RestartModel(), when there is no free memory for allocation. +And Ppmd7_RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions. +Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U +#undef I2U_UInt16 +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/src/core/fex/7z_C/Ppmd7.h b/src/core/fex/7z_C/Ppmd7.h index df7d3476..65c22ae9 100644 --- a/src/core/fex/7z_C/Ppmd7.h +++ b/src/core/fex/7z_C/Ppmd7.h @@ -1,132 +1,181 @@ -/* Ppmd7.h -- PPMdH compression codec -2010-03-12 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -/* This code supports virtual RangeDecoder and includes the implementation -of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H. -If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */ - -#ifndef __PPMD7_H -#define __PPMD7_H - -#include "Ppmd.h" - -EXTERN_C_BEGIN - -#define PPMD7_MIN_ORDER 2 -#define PPMD7_MAX_ORDER 64 - -#define PPMD7_MIN_MEM_SIZE (1 << 11) -#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3) - -struct CPpmd7_Context_; - -typedef -#ifdef PPMD_32BIT - struct CPpmd7_Context_* -#else - UInt32 -#endif - CPpmd7_Context_Ref; - -typedef struct CPpmd7_Context_ { - UInt16 NumStats; - UInt16 SummFreq; - CPpmd_State_Ref Stats; - CPpmd7_Context_Ref Suffix; -} CPpmd7_Context; - -#define Ppmd7Context_OneState(p) ((CPpmd_State*)&(p)->SummFreq) - -typedef struct -{ - CPpmd7_Context *MinContext, *MaxContext; - CPpmd_State* FoundState; - unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag; - Int32 RunLength, InitRL; /* must be 32-bit at least */ - - UInt32 Size; - UInt32 GlueCount; - Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; - UInt32 AlignOffset; - - Byte Indx2Units[PPMD_NUM_INDEXES]; - Byte Units2Indx[128]; - CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; - Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256]; - CPpmd_See DummySee, See[25][16]; - UInt16 BinSumm[128][64]; -} CPpmd7; - -void Ppmd7_Construct(CPpmd7* p); -Bool Ppmd7_Alloc(CPpmd7* p, UInt32 size, ISzAlloc* alloc); -void Ppmd7_Free(CPpmd7* p, ISzAlloc* alloc); -void Ppmd7_Init(CPpmd7* p, unsigned maxOrder); -#define Ppmd7_WasAllocated(p) ((p)->Base != NULL) - -/* ---------- Internal Functions ---------- */ - -extern const Byte PPMD7_kExpEscape[16]; - -#ifdef PPMD_32BIT -#define Ppmd7_GetPtr(p, ptr) (ptr) -#define Ppmd7_GetContext(p, ptr) (ptr) -#define Ppmd7_GetStats(p, ctx) ((ctx)->Stats) -#else -#define Ppmd7_GetPtr(p, offs) ((void*)((p)->Base + (offs))) -#define Ppmd7_GetContext(p, offs) ((CPpmd7_Context*)Ppmd7_GetPtr((p), (offs))) -#define Ppmd7_GetStats(p, ctx) ((CPpmd_State*)Ppmd7_GetPtr((p), ((ctx)->Stats))) -#endif - -void Ppmd7_Update1(CPpmd7* p); -void Ppmd7_Update1_0(CPpmd7* p); -void Ppmd7_Update2(CPpmd7* p); -void Ppmd7_UpdateBin(CPpmd7* p); - -#define Ppmd7_GetBinSumm(p) \ - &p->BinSumm[Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + p->NS2BSIndx[Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + 2 * p->HB2Flag[Ppmd7Context_OneState(p->MinContext)->Symbol] + ((p->RunLength >> 26) & 0x20)] - -CPpmd_See* Ppmd7_MakeEscFreq(CPpmd7* p, unsigned numMasked, UInt32* scale); - -/* ---------- Decode ---------- */ - -typedef struct -{ - UInt32 (*GetThreshold)(void* p, UInt32 total); - void (*Decode)(void* p, UInt32 start, UInt32 size); - UInt32 (*DecodeBit)(void* p, UInt32 size0); -} IPpmd7_RangeDec; - -typedef struct -{ - IPpmd7_RangeDec p; - UInt32 Range; - UInt32 Code; - IByteIn* Stream; -} CPpmd7z_RangeDec; - -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec* p); -Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec* p); -#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) - -int Ppmd7_DecodeSymbol(CPpmd7* p, IPpmd7_RangeDec* rc); - -/* ---------- Encode ---------- */ - -typedef struct -{ - UInt64 Low; - UInt32 Range; - Byte Cache; - UInt64 CacheSize; - IByteOut* Stream; -} CPpmd7z_RangeEnc; - -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc* p); -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc* p); - -void Ppmd7_EncodeSymbol(CPpmd7* p, CPpmd7z_RangeEnc* rc, int symbol); - -EXTERN_C_END - -#endif +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#ifndef ZIP7_INC_PPMD7_H +#define ZIP7_INC_PPMD7_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD7_MIN_ORDER 2 +#define PPMD7_MAX_ORDER 64 + +#define PPMD7_MIN_MEM_SIZE (1 << 11) +#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3) + +struct CPpmd7_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd7_Context_ +{ + UInt16 NumStats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd7_Context_Ref Suffix; +} CPpmd7_Context; + +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteInPtr Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOutPtr Stream; +} CPpmd7z_RangeEnc; + + +typedef struct +{ + CPpmd7_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[25][16]; + UInt16 BinSumm[128][64]; + // int LastSymbol; +} CPpmd7; + + +void Ppmd7_Construct(CPpmd7 *p); +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); +#define Ppmd7_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd7_Update1(CPpmd7 *p); +void Ppmd7_Update1_0(CPpmd7 *p); +void Ppmd7_Update2(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) + +#define Ppmd7_GetBinSumm(p) \ + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); + + +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + +/* ---------- Decode ---------- */ + +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) + +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() + +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ + +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); + + +/* ---------- Encode ---------- */ + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Ppmd7Dec.c b/src/core/fex/7z_C/Ppmd7Dec.c index 1caed6df..e601feb1 100644 --- a/src/core/fex/7z_C/Ppmd7Dec.c +++ b/src/core/fex/7z_C/Ppmd7Dec.c @@ -1,172 +1,312 @@ -/* Ppmd7Dec.c -- PPMdH Decoder -2010-03-12 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -#include "Ppmd7.h" - -#define kTopValue (1 << 24) - -Bool Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec* p) -{ - unsigned _i; - p->Code = 0; - p->Range = 0xFFFFFFFF; - if (p->Stream->Read((void*)p->Stream) != 0) - return False; - for (_i = 0; _i < 4; _i++) - p->Code = (p->Code << 8) | p->Stream->Read((void*)p->Stream); - return (p->Code < 0xFFFFFFFF); -} - -static UInt32 Range_GetThreshold(void* pp, UInt32 total) -{ - CPpmd7z_RangeDec* p = (CPpmd7z_RangeDec*)pp; - return (p->Code) / (p->Range /= total); -} - -static void Range_Normalize(CPpmd7z_RangeDec* p) -{ - if (p->Range < kTopValue) { - p->Code = (p->Code << 8) | p->Stream->Read((void*)p->Stream); - p->Range <<= 8; - if (p->Range < kTopValue) { - p->Code = (p->Code << 8) | p->Stream->Read((void*)p->Stream); - p->Range <<= 8; - } - } -} - -static void Range_Decode(void* pp, UInt32 start, UInt32 size) -{ - CPpmd7z_RangeDec* p = (CPpmd7z_RangeDec*)pp; - p->Code -= start * p->Range; - p->Range *= size; - Range_Normalize(p); -} - -static UInt32 Range_DecodeBit(void* pp, UInt32 size0) -{ - CPpmd7z_RangeDec* p = (CPpmd7z_RangeDec*)pp; - UInt32 newBound = (p->Range >> 14) * size0; - UInt32 symbol; - if (p->Code < newBound) { - symbol = 0; - p->Range = newBound; - } else { - symbol = 1; - p->Code -= newBound; - p->Range -= newBound; - } - Range_Normalize(p); - return symbol; -} - -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec* p) -{ - p->p.GetThreshold = Range_GetThreshold; - p->p.Decode = Range_Decode; - p->p.DecodeBit = Range_DecodeBit; -} - -#define MASK(sym) ((signed char*)charMask)[sym] - -int Ppmd7_DecodeSymbol(CPpmd7* p, IPpmd7_RangeDec* rc) -{ - size_t charMask[256 / sizeof(size_t)]; - if (p->MinContext->NumStats != 1) { - CPpmd_State* s = Ppmd7_GetStats(p, p->MinContext); - unsigned _i; - UInt32 count, hiCnt; - if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq)) { - Byte symbol; - rc->Decode(rc, 0, s->Freq); - p->FoundState = s; - symbol = s->Symbol; - Ppmd7_Update1_0(p); - return symbol; - } - p->PrevSuccess = 0; - _i = p->MinContext->NumStats - 1; - do { - if ((hiCnt += (++s)->Freq) > count) { - Byte symbol; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); - p->FoundState = s; - symbol = s->Symbol; - Ppmd7_Update1(p); - return symbol; - } - } while (--_i); - if (count >= p->MinContext->SummFreq) - return -2; - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; - rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt); - PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - _i = p->MinContext->NumStats - 1; - do { - MASK((--s)->Symbol) = 0; - } while (--_i); - } else { - UInt16* prob = Ppmd7_GetBinSumm(p); - if (rc->DecodeBit(rc, *prob) == 0) { - Byte symbol; - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; - Ppmd7_UpdateBin(p); - return symbol; - } - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; - PPMD_SetAllBitsIn256Bytes(charMask); - MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; - p->PrevSuccess = 0; - } - for (;;) { - CPpmd_State *ps[256], *s; - UInt32 freqSum, count, hiCnt; - CPpmd_See* see; - unsigned _i, num, numMasked = p->MinContext->NumStats; - do { - p->OrderFall++; - if (!p->MinContext->Suffix) - return -1; - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); - } while (p->MinContext->NumStats == numMasked); - hiCnt = 0; - s = Ppmd7_GetStats(p, p->MinContext); - _i = 0; - num = p->MinContext->NumStats - numMasked; - do { - int k = (int)(MASK(s->Symbol)); - hiCnt += (s->Freq & k); - ps[_i] = s++; - _i -= k; - } while (_i != num); - - see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); - freqSum += hiCnt; - count = rc->GetThreshold(rc, freqSum); - - if (count < hiCnt) { - Byte symbol; - CPpmd_State** pps = ps; - for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++) - ; - s = *pps; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); - Ppmd_See_Update(see); - p->FoundState = s; - symbol = s->Symbol; - Ppmd7_Update2(p); - return symbol; - } - if (count >= freqSum) - return -2; - rc->Decode(rc, hiCnt, freqSum - hiCnt); - see->Summ = (UInt16)(see->Summ + freqSum); - do { - MASK(ps[--_i]->Symbol) = 0; - } while (_i != 0); - } -} +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2023-09-07 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTopValue ((UInt32)1 << 24) + + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + if (READ_BYTE(p) != 0) + return False; + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +// typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((Byte *)charMask)[sym] +// Z7_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd7_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0))); + hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + + + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (UInt32)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ + +#undef kTopValue +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/src/core/fex/7z_C/Ppmd7aDec.c b/src/core/fex/7z_C/Ppmd7aDec.c new file mode 100644 index 00000000..7f3d1e56 --- /dev/null +++ b/src/core/fex/7z_C/Ppmd7aDec.c @@ -0,0 +1,295 @@ +/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder +2023-09-07 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7a_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7a_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((Byte *)charMask)[sym] + + +int Ppmd7a_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + if (summFreq > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0))); + hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + if (freqSum > R->Range) + return PPMD7_SYM_ERROR; + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (UInt32)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/src/core/fex/7z_C/Ppmd8.c b/src/core/fex/7z_C/Ppmd8.c new file mode 100644 index 00000000..2ac11580 --- /dev/null +++ b/src/core/fex/7z_C/Ppmd8.c @@ -0,0 +1,1574 @@ +/* Ppmd8.c -- PPMdI codec +2023-09-07 : Igor Pavlov : Public domain +This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd8.h" + + + + +MY_ALIGN(16) +static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD8_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) + + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +#define STATS(ctx) Ppmd8_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd8Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd8_Context * PPMD8_CTX_PTR; + +struct CPpmd8_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Node_) CPpmd8_Node_Ref; + +typedef struct CPpmd8_Node_ +{ + UInt32 Stamp; + + CPpmd8_Node_Ref Next; + UInt32 NU; +} CPpmd8_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd8_Node) + +void Ppmd8_Construct(CPpmd8 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 5; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 260; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 4; + } + + memcpy(p->ExpEscape, PPMD8_kExpEscape, 16); +} + + +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd8_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + + + + + + +#define EMPTY_NODE 0xFFFFFFFF + + +static void Ppmd8_InsertNode(CPpmd8 *p, void *node, unsigned indx) +{ + ((CPpmd8_Node *)node)->Stamp = EMPTY_NODE; + ((CPpmd8_Node *)node)->Next = (CPpmd8_Node_Ref)p->FreeList[indx]; + ((CPpmd8_Node *)node)->NU = I2U(indx); + p->FreeList[indx] = REF(node); + p->Stamps[indx]++; +} + + +static void *Ppmd8_RemoveNode(CPpmd8 *p, unsigned indx) +{ + CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]); + p->FreeList[indx] = node->Next; + p->Stamps[indx]--; + + return node; +} + + +static void Ppmd8_SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd8_InsertNode(p, ptr, i); +} + + + + + + + + + + + + + + +static void Ppmd8_GlueFreeBlocks(CPpmd8 *p) +{ + /* + we use first UInt32 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0xFF + CPpmd8_Context { Byte NumStats; Byte Flags; UInt16 SummFreq; : Flags != 0xFF ??? + CPpmd8_Node { UInt32 Stamp : Stamp == 0xFFFFFFFF for free record + : Stamp == 0 for guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd8_Context record + */ + CPpmd8_Node_Ref n; + + p->GlueCount = 1 << 13; + memset(p->Stamps, 0, sizeof(p->Stamps)); + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + /* Glue free blocks */ + CPpmd8_Node_Ref *prev = &n; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + + CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + CPpmd8_Node *node = NODE(next); + UInt32 nu = node->NU; + *prev = next; + next = node->Next; + if (nu != 0) + { + CPpmd8_Node *node2; + prev = &(node->Next); + while ((node2 = node + nu)->Stamp == EMPTY_NODE) + { + nu += node2->NU; + node2->NU = 0; + node->NU = nu; + } + } + } + } + + *prev = 0; + } + + + + + + + + + + + + + + + + + + + + + /* Fill lists of free blocks */ + while (n != 0) + { + CPpmd8_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd8_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd8_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd8_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd8_AllocUnitsRare(CPpmd8 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd8_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : (NULL); + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd8_RemoveNode(p, i); + Ppmd8_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd8_AllocUnits(CPpmd8 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd8_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd8_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)(dest); \ + const UInt32 *z = (const UInt32 *)(src); \ + unsigned n = (num); \ + do { \ + d[0] = z[0]; \ + d[1] = z[1]; \ + d[2] = z[2]; \ + z += 3; \ + d += 3; \ + } while (--n); \ + } + + + +static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd8_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd8_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd8_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} + + +static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu) +{ + Ppmd8_InsertNode(p, ptr, U2I(nu)); +} + + +static void SpecialFreeUnit(CPpmd8 *p, void *ptr) +{ + if ((Byte *)ptr != p->UnitsStart) + Ppmd8_InsertNode(p, ptr, 0); + else + { + #ifdef PPMD8_FREEZE_SUPPORT + *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts() */ + #endif + p->UnitsStart += UNIT_SIZE; + } +} + + +/* +static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu) +{ + unsigned indx = U2I(nu); + void *ptr; + if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx]) + return oldPtr; + ptr = Ppmd8_RemoveNode(p, indx); + MEM_12_CPY(ptr, oldPtr, nu) + if ((Byte *)oldPtr != p->UnitsStart) + Ppmd8_InsertNode(p, oldPtr, indx); + else + p->UnitsStart += U2B(I2U(indx)); + return ptr; +} +*/ + +static void ExpandTextArea(CPpmd8 *p) +{ + UInt32 count[PPMD_NUM_INDEXES]; + unsigned i; + + memset(count, 0, sizeof(count)); + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0; + + { + CPpmd8_Node *node = (CPpmd8_Node *)(void *)p->UnitsStart; + while (node->Stamp == EMPTY_NODE) + { + UInt32 nu = node->NU; + node->Stamp = 0; + count[U2I(nu)]++; + node += nu; + } + p->UnitsStart = (Byte *)node; + } + + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + UInt32 cnt = count[i]; + if (cnt == 0) + continue; + { + CPpmd8_Node_Ref *prev = (CPpmd8_Node_Ref *)&p->FreeList[i]; + CPpmd8_Node_Ref n = *prev; + p->Stamps[i] -= cnt; + for (;;) + { + CPpmd8_Node *node = NODE(n); + n = node->Next; + if (node->Stamp != 0) + { + prev = &node->Next; + continue; + } + *prev = n; + if (--cnt == 0) + break; + } + } + } +} + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void Ppmd8State_SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + +#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); } + +Z7_NO_INLINE +static +void Ppmd8_RestartModel(CPpmd8 *p) +{ + unsigned i, k, m; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + memset(p->Stamps, 0, sizeof(p->Stamps)); + RESET_TEXT(0) + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd8_Context *mc = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd8_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + mc->Flags = 0; + mc->NumStats = 256 - 1; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + Ppmd8State_SetSuccessor(s, 0); + } + } + + + + + + + + + + + + + for (i = m = 0; m < 25; m++) + { + while (p->NS2Indx[i] == m) + i++; + for (k = 0; k < 8; k++) + { + unsigned r; + UInt16 *dest = p->BinSumm[m] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD8_kInitBinEsc[k] / (i + 1)); + for (r = 0; r < 64; r += 8) + dest[r] = val; + } + } + + for (i = m = 0; m < 24; m++) + { + unsigned summ; + CPpmd_See *s; + while (p->NS2Indx[(size_t)i + 3] == m + 3) + i++; + s = p->See[m]; + summ = ((2 * i + 5) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 32; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 7; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod) +{ + p->MaxOrder = maxOrder; + p->RestoreMethod = restoreMethod; + Ppmd8_RestartModel(p); +} + + +#define FLAG_RESCALED (1 << 2) +// #define FLAG_SYM_HIGH (1 << 3) +#define FLAG_PREV_HIGH (1 << 4) + +#define HiBits_Prepare(sym) ((unsigned)(sym) + 0xC0) + +#define HiBits_Convert_3(flags) (((flags) >> (8 - 3)) & (1 << 3)) +#define HiBits_Convert_4(flags) (((flags) >> (8 - 4)) & (1 << 4)) + +#define PPMD8_HiBitsFlag_3(sym) HiBits_Convert_3(HiBits_Prepare(sym)) +#define PPMD8_HiBitsFlag_4(sym) HiBits_Convert_4(HiBits_Prepare(sym)) + +// #define PPMD8_HiBitsFlag_3(sym) (0x08 * ((sym) >= 0x40)) +// #define PPMD8_HiBitsFlag_4(sym) (0x10 * ((sym) >= 0x40)) + +/* +Refresh() is called when we remove some symbols (successors) in context. +It increases Escape_Freq for sum of all removed symbols. +*/ + +static void Refresh(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned oldNU, unsigned scale) +{ + unsigned i = ctx->NumStats, escFreq, sumFreq, flags; + CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1); + ctx->Union4.Stats = REF(s); + + // #ifdef PPMD8_FREEZE_SUPPORT + /* + (ctx->Union2.SummFreq >= ((UInt32)1 << 15)) can be in FREEZE mode for some files. + It's not good for range coder. So new versions of support fix: + - original PPMdI code rev.1 + + original PPMdI code rev.2 + - 7-Zip default ((PPMD8_FREEZE_SUPPORT is not defined) + + 7-Zip (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + if we use that fixed line, we can lose compatibility with some files created before fix + if we don't use that fixed line, the program can work incorrectly in FREEZE mode in rare case. + */ + // if (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE) + { + scale |= (ctx->Union2.SummFreq >= ((UInt32)1 << 15)); + } + // #endif + + + + flags = HiBits_Prepare(s->Symbol); + { + unsigned freq = s->Freq; + escFreq = ctx->Union2.SummFreq - freq; + freq = (freq + scale) >> scale; + sumFreq = freq; + s->Freq = (Byte)freq; + } + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + scale) >> scale; + sumFreq += freq; + s->Freq = (Byte)freq; + flags |= HiBits_Prepare(s->Symbol); + } + while (--i); + + ctx->Union2.SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale)); + ctx->Flags = (Byte)((ctx->Flags & (FLAG_PREV_HIGH + FLAG_RESCALED * scale)) + HiBits_Convert_3(flags)); +} + + +static void SWAP_STATES(CPpmd_State *t1, CPpmd_State *t2) +{ + CPpmd_State tmp = *t1; + *t1 = *t2; + *t2 = tmp; +} + + +/* +CutOff() reduces contexts: + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It removes RAW-Successors and NULL-Successors that are not Order-0 + and it removes contexts when it has no Successors. + if the (Union4.Stats) is close to (UnitsStart), it moves it up. +*/ + +static CPpmd_Void_Ref CutOff(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + int ns = ctx->NumStats; + unsigned nu; + CPpmd_State *stats; + + if (ns == 0) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart) + { + if (order < p->MaxOrder) + successor = CutOff(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + if (successor || order <= 9) /* O_BOUND */ + return REF(ctx); + } + SpecialFreeUnit(p, ctx); + return 0; + } + + nu = ((unsigned)ns + 2) >> 1; + // ctx->Union4.Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), nu)); + { + unsigned indx = U2I(nu); + stats = STATS(ctx); + + if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14) + && (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx]) + { + void *ptr = Ppmd8_RemoveNode(p, indx); + ctx->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, nu) + if ((Byte *)stats != p->UnitsStart) + Ppmd8_InsertNode(p, stats, indx); + else + p->UnitsStart += U2B(I2U(indx)); + stats = ptr; + } + } + + { + CPpmd_State *s = stats + (unsigned)ns; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) < p->UnitsStart) + { + CPpmd_State *s2 = stats + (unsigned)(ns--); + if (order) + { + if (s != s2) + *s = *s2; + } + else + { + SWAP_STATES(s, s2); + Ppmd8State_SetSuccessor(s2, 0); + } + } + else + { + if (order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, CutOff(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + } + while (--s >= stats); + } + + if (ns != ctx->NumStats && order) + { + if (ns < 0) + { + FreeUnits(p, stats, nu); + SpecialFreeUnit(p, ctx); + return 0; + } + ctx->NumStats = (Byte)ns; + if (ns == 0) + { + const Byte sym = stats->Symbol; + ctx->Flags = (Byte)((ctx->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(sym)); + // *ONE_STATE(ctx) = *stats; + ctx->Union2.State2.Symbol = sym; + ctx->Union2.State2.Freq = (Byte)(((unsigned)stats->Freq + 11) >> 3); + ctx->Union4.State4.Successor_0 = stats->Successor_0; + ctx->Union4.State4.Successor_1 = stats->Successor_1; + FreeUnits(p, stats, nu); + } + else + { + Refresh(p, ctx, nu, ctx->Union2.SummFreq > 16 * (unsigned)ns); + } + } + + return REF(ctx); +} + + + +#ifdef PPMD8_FREEZE_SUPPORT + +/* +RemoveBinContexts() + It conversts Successors at MaxOrder to another Contexts to NULL-Successors + It changes RAW-Successors to NULL-Successors + removes Bin Context without Successor, if suffix of that context is also binary. +*/ + +static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order) +{ + if (!ctx->NumStats) + { + CPpmd_State *s = ONE_STATE(ctx); + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + successor = RemoveBinContexts(p, CTX(successor), order + 1); + else + successor = 0; + Ppmd8State_SetSuccessor(s, successor); + /* Suffix context can be removed already, since different (high-order) + Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */ + if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF)) + { + FreeUnits(p, ctx, 1); + return 0; + } + } + else + { + CPpmd_State *s = STATS(ctx) + ctx->NumStats; + do + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder) + Ppmd8State_SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1)); + else + Ppmd8State_SetSuccessor(s, 0); + } + while (--s >= STATS(ctx)); + } + + return REF(ctx); +} + +#endif + + + +static UInt32 GetUsedMemory(const CPpmd8 *p) +{ + UInt32 v = 0; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + v += p->Stamps[i] * I2U(i); + return p->Size - (UInt32)(p->HiUnit - p->LoUnit) - (UInt32)(p->UnitsStart - p->Text) - U2B(v); +} + +#ifdef PPMD8_FREEZE_SUPPORT + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1, fSuccessor) +#else + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1) +#endif + + +static void RestoreModel(CPpmd8 *p, PPMD8_CTX_PTR ctxError + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_CTX_PTR fSuccessor + #endif + ) +{ + PPMD8_CTX_PTR c; + CPpmd_State *s; + RESET_TEXT(0) + + // we go here in cases of error of allocation for context (c1) + // Order(MinContext) < Order(ctxError) <= Order(MaxContext) + + // We remove last symbol from each of contexts [p->MaxContext ... ctxError) contexts + // So we rollback all created (symbols) before error. + for (c = p->MaxContext; c != ctxError; c = SUFFIX(c)) + if (--(c->NumStats) == 0) + { + s = STATS(c); + c->Flags = (Byte)((c->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(s->Symbol)); + // *ONE_STATE(c) = *s; + c->Union2.State2.Symbol = s->Symbol; + c->Union2.State2.Freq = (Byte)(((unsigned)s->Freq + 11) >> 3); + c->Union4.State4.Successor_0 = s->Successor_0; + c->Union4.State4.Successor_1 = s->Successor_1; + + SpecialFreeUnit(p, s); + } + else + { + /* Refresh() can increase Escape_Freq on value of Freq of last symbol, that was added before error. + so the largest possible increase for Escape_Freq is (8) from value before ModelUpoadet() */ + Refresh(p, c, ((unsigned)c->NumStats + 3) >> 1, 0); + } + + // increase Escape Freq for context [ctxError ... p->MinContext) + for (; c != p->MinContext; c = SUFFIX(c)) + if (c->NumStats == 0) + { + // ONE_STATE(c) + c->Union2.State2.Freq = (Byte)(((unsigned)c->Union2.State2.Freq + 1) >> 1); + } + else if ((c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 4)) > 128 + 4 * c->NumStats) + Refresh(p, c, ((unsigned)c->NumStats + 2) >> 1, 1); + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + p->MaxContext = fSuccessor; + p->GlueCount += !(p->Stamps[1] & 1); // why? + } + else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE) + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + RemoveBinContexts(p, p->MaxContext, 0); + // we change the current mode to (PPMD8_RESTORE_METHOD_FREEZE + 1) + p->RestoreMethod = PPMD8_RESTORE_METHOD_FREEZE + 1; + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + else + #endif + if (p->RestoreMethod == PPMD8_RESTORE_METHOD_RESTART || GetUsedMemory(p) < (p->Size >> 1)) + Ppmd8_RestartModel(p); + else + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + do + { + CutOff(p, p->MaxContext, 0); + ExpandTextArea(p); + } + while (GetUsedMemory(p) > 3 * (p->Size >> 2)); + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + p->MinContext = p->MaxContext; +} + + + +Z7_NO_INLINE +static PPMD8_CTX_PTR Ppmd8_CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq, flags; + unsigned numPs = 0; + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; /* fixed over Shkarin's code. Maybe it could work without + 1 too. */ + + if (!skip) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + if (s1) { s = s1; s1 = NULL; } + else if (c->NumStats != 0) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + if (s->Freq < MAX_FREQ - 9) { s->Freq++; c->Union2.SummFreq++; } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (!SUFFIX(c)->NumStats & (s->Freq < 24))); + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + + c = CTX(successor); + if (numPs == 0) + { + + + return c; + } + break; + } + ps[numPs++] = s; + } + + + + + + newSym = *(const Byte *)Ppmd8_GetPtr(p, upBranch); + upBranch++; + flags = (Byte)(PPMD8_HiBitsFlag_4(p->FoundState->Symbol) + PPMD8_HiBitsFlag_3(newSym)); + + if (c->NumStats == 0) + newFreq = c->Union2.State2.Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + + + max(newFreq)= (s->Freq - 1), when (s0 == 1) + + + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0))); + } + + + + do + { + PPMD8_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD8_CTX_PTR)Ppmd8_RemoveNode(p, 0); + else + { + c1 = (PPMD8_CTX_PTR)Ppmd8_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + c1->Flags = flags; + c1->NumStats = 0; + c1->Union2.State2.Symbol = newSym; + c1->Union2.State2.Freq = newFreq; + Ppmd8State_SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + Ppmd8State_SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + +static PPMD8_CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, PPMD8_CTX_PTR c) +{ + CPpmd_State *s = NULL; + PPMD8_CTX_PTR c1 = c; + CPpmd_Void_Ref upBranch = REF(p->Text); + + #ifdef PPMD8_FREEZE_SUPPORT + /* The BUG in Shkarin's code was fixed: ps could overflow in CUT_OFF mode. */ + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; + unsigned numPs = 0; + ps[numPs++] = p->FoundState; + #endif + + Ppmd8State_SetSuccessor(p->FoundState, upBranch); + p->OrderFall++; + + for (;;) + { + if (s1) + { + c = SUFFIX(c); + s = s1; + s1 = NULL; + } + else + { + if (!c->Suffix) + { + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + } + #endif + return c; + } + c = SUFFIX(c); + if (c->NumStats) + { + if ((s = STATS(c))->Symbol != p->FoundState->Symbol) + do { s++; } while (s->Symbol != p->FoundState->Symbol); + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (s->Freq < 32)); + } + } + if (SUCCESSOR(s)) + break; + #ifdef PPMD8_FREEZE_SUPPORT + ps[numPs++] = s; + #endif + Ppmd8State_SetSuccessor(s, upBranch); + p->OrderFall++; + } + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + c = CTX(SUCCESSOR(s)); + do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1) + p->OrderFall = 1; + return c; + } + else + #endif + if (SUCCESSOR(s) <= upBranch) + { + PPMD8_CTX_PTR successor; + CPpmd_State *s2 = p->FoundState; + p->FoundState = s; + + successor = Ppmd8_CreateSuccessors(p, False, NULL, c); + if (!successor) + Ppmd8State_SetSuccessor(s, 0); + else + Ppmd8State_SetSuccessor(s, REF(successor)); + p->FoundState = s2; + } + + { + CPpmd_Void_Ref successor = SUCCESSOR(s); + if (p->OrderFall == 1 && c1 == p->MaxContext) + { + Ppmd8State_SetSuccessor(p->FoundState, successor); + p->Text--; + } + if (successor == 0) + return NULL; + return CTX(successor); + } +} + + + +void Ppmd8_UpdateModel(CPpmd8 *p); +Z7_NO_INLINE +void Ppmd8_UpdateModel(CPpmd8 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState); + PPMD8_CTX_PTR c; + unsigned s0, ns, fFreq = p->FoundState->Freq; + Byte flag, fSymbol = p->FoundState->Symbol; + { + CPpmd_State *s = NULL; + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 0) + { + s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + Byte sym = p->FoundState->Symbol; + s = STATS(c); + + if (s->Symbol != sym) + { + do + { + + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(&s[0], &s[-1]); + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + c = p->MaxContext; + if (p->OrderFall == 0 && minSuccessor) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, True, s, p->MinContext); + if (!cs) + { + Ppmd8State_SetSuccessor(p->FoundState, 0); + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + Ppmd8State_SetSuccessor(p->FoundState, REF(cs)); + p->MinContext = p->MaxContext = cs; + return; + } + + + + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RESTORE_MODEL(c, CTX(minSuccessor)); /* check it */ + return; + } + maxSuccessor = REF(text); + } + + if (!minSuccessor) + { + PPMD8_CTX_PTR cs = ReduceOrder(p, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart) + { + PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, False, s, p->MinContext); + if (!cs) + { + RESTORE_MODEL(c, NULL); + return; + } + minSuccessor = REF(cs); + } + + if (--p->OrderFall == 0) + { + maxSuccessor = minSuccessor; + p->Text -= (p->MaxContext != p->MinContext); + } + #ifdef PPMD8_FREEZE_SUPPORT + else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + maxSuccessor = minSuccessor; + RESET_TEXT(0) + p->OrderFall = 0; + } + #endif + } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flag = (Byte)(PPMD8_HiBitsFlag_3(fSymbol)); + s0 = p->MinContext->Union2.SummFreq - (ns = p->MinContext->NumStats) - fFreq; + + for (; c != p->MinContext; c = SUFFIX(c)) + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 0) + { + if ((ns1 & 1) != 0) + { + /* Expand for one UNIT */ + const unsigned oldNU = (ns1 + 1) >> 1; + const unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd8_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd8_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 1 here. + an average increase is 1/3 per symbol */ + sum += (UInt32)(unsigned)(3 * ns1 + 1 < ns); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < ???). Do we need to truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + + CPpmd_State *s = (CPpmd_State*)Ppmd8_AllocUnits(p, 0); + if (!s) + { + RESTORE_MODEL(c, CTX(minSuccessor)); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // Ppmd8State_SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + + s->Freq = (Byte)freq; + + sum = (UInt32)(freq + p->InitEsc + (ns > 2)); // Ppmd8 (> 2) + } + } + + { + CPpmd_State *s = STATS(c) + ns1 + 1; + UInt32 cf = 2 * (sum + 6) * (UInt32)fFreq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = fSymbol; + c->NumStats = (Byte)(ns1 + 1); + Ppmd8State_SetSuccessor(s, maxSuccessor); + c->Flags |= flag; + if (cf < 6 * sf) + { + cf = (unsigned)1 + (cf > sf) + (cf >= 4 * sf); + sum += 4; + /* It can add (1, 2, 3) to Escape_Freq */ + } + else + { + cf = (unsigned)4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + + } + p->MaxContext = p->MinContext = CTX(minSuccessor); +} + + + +Z7_NO_INLINE +static void Ppmd8_Rescale(CPpmd8 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + + + + + + adder = (p->OrderFall != 0); + + #ifdef PPMD8_FREEZE_SUPPORT + adder |= (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE); + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = p->MinContext->NumStats; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd8_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + + + + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (Byte)(numStatsNew); + n0 = (numStats + 2) >> 1; + + if (numStatsNew == 0) + { + + unsigned freq = (2 * (unsigned)stats->Freq + escFreq - 1) / escFreq; + if (freq > MAX_FREQ / 3) + freq = MAX_FREQ / 3; + mc->Flags = (Byte)((mc->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(stats->Symbol)); + + + + + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; + p->FoundState = s; + Ppmd8_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 2) >> 1; + if (n0 != n1) + mc->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // here we are for max order only. So Ppmd8_MakeEscFreq() doesn't use mc->Flags + // but we still need current (Flags & FLAG_PREV_HIGH), if we will convert context to 1-symbol context later. + /* + unsigned flags = HiBits_Prepare((s = STATS(mc))->Symbol); + i = mc->NumStats; + do { flags |= HiBits_Prepare((++s)->Symbol); } while (--i); + mc->Flags = (Byte)((mc->Flags & ~FLAG_SYM_HIGH) + HiBits_Convert_3(flags)); + */ + } + } + + + + + + + { + CPpmd8_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + mc->Flags |= FLAG_RESCALED; + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd8_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 0xFF) + { + // (3 <= numStats + 2 <= 256) (3 <= NS2Indx[3] and NS2Indx[256] === 26) + see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)numStats + 2] - 3] + + (mc->Union2.SummFreq > 11 * (numStats + 1)) + + 2 * (unsigned)(2 * numStats < ((unsigned)SUFFIX(mc)->NumStats + numMasked1)) + + mc->Flags; + + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + const unsigned summ = (UInt16)see->Summ; // & 0xFFFF + const unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = (UInt32)(r + (r == 0)); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd8_NextContext(CPpmd8 *p) +{ + PPMD8_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); +} + + +void Ppmd8_Update1(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s, &s[-1]); + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + } + Ppmd8_NextContext(p); +} + + +void Ppmd8_Update1_0(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd8_Context *mc = p->MinContext; + unsigned freq = s->Freq; + const unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=) + p->RunLength += (Int32)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_NextContext(p); +} + + +/* +void Ppmd8_UpdateBin(CPpmd8 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196) + p->PrevSuccess = 1; + p->RunLength++; + Ppmd8_NextContext(p); +} +*/ + +void Ppmd8_Update2(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd8_Rescale(p); + Ppmd8_UpdateModel(p); +} + +/* H->I changes: + NS2Indx + GlueCount, and Glue method + BinSum + See / EscFreq + Ppmd8_CreateSuccessors updates more suffix contexts + Ppmd8_UpdateModel consts. + PrevSuccess Update + +Flags: + (1 << 2) - the Context was Rescaled + (1 << 3) - there is symbol in Stats with (sym >= 0x40) in + (1 << 4) - main symbol of context is (sym >= 0x40) +*/ + +#undef RESET_TEXT +#undef FLAG_RESCALED +#undef FLAG_PREV_HIGH +#undef HiBits_Prepare +#undef HiBits_Convert_3 +#undef HiBits_Convert_4 +#undef PPMD8_HiBitsFlag_3 +#undef PPMD8_HiBitsFlag_4 +#undef RESTORE_MODEL + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U + +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/src/core/fex/7z_C/Ppmd8.h b/src/core/fex/7z_C/Ppmd8.h new file mode 100644 index 00000000..44943b85 --- /dev/null +++ b/src/core/fex/7z_C/Ppmd8.h @@ -0,0 +1,181 @@ +/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#ifndef ZIP7_INC_PPMD8_H +#define ZIP7_INC_PPMD8_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD8_MIN_ORDER 2 +#define PPMD8_MAX_ORDER 16 + + + + +struct CPpmd8_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd8_Context_ +{ + Byte NumStats; + Byte Flags; + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd8_Context_Ref Suffix; +} CPpmd8_Context; + +// MY_CPU_pragma_pop + +#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + +/* PPMdI code rev.2 contains the fix over PPMdI code rev.1. + But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed + in FREEZE mode. So we disable FREEZE mode support. */ + +// #define PPMD8_FREEZE_SUPPORT + +enum +{ + PPMD8_RESTORE_METHOD_RESTART, + PPMD8_RESTORE_METHOD_CUT_OFF + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_RESTORE_METHOD_FREEZE + #endif + , PPMD8_RESTORE_METHOD_UNSUPPPORTED +}; + + + + + + + + +typedef struct +{ + CPpmd8_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + UInt32 Range; + UInt32 Code; + UInt32 Low; + union + { + IByteInPtr In; + IByteOutPtr Out; + } Stream; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + UInt32 Stamps[PPMD_NUM_INDEXES]; + Byte NS2BSIndx[256], NS2Indx[260]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[24][32]; + UInt16 BinSumm[25][64]; + +} CPpmd8; + + +void Ppmd8_Construct(CPpmd8 *p); +BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc); +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod); +#define Ppmd8_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context) +#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd8_Update1(CPpmd8 *p); +void Ppmd8_Update1_0(CPpmd8 *p); +void Ppmd8_Update2(CPpmd8 *p); + + + + + + +#define Ppmd8_GetBinSumm(p) \ + &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \ + + p->MinContext->Flags ] + + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale); + + +/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases, + where the original PPMdI code can give "Divide by Zero" operation. + We use the following fix to allow correct working of encoder and decoder in any cases. + We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */ +#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range; + + +/* ---------- Decode ---------- */ + +#define PPMD8_SYM_END (-1) +#define PPMD8_SYM_ERROR (-2) + +/* +You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init() + +Ppmd8_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD8_SYM_END : End of payload marker + -2 : PPMD8_SYM_ERROR : Data error +*/ + + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p); +#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd8_DecodeSymbol(CPpmd8 *p); + + + + + + + + +/* ---------- Encode ---------- */ + +#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; } +void Ppmd8_Flush_RangeEnc(CPpmd8 *p); +void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); + + +EXTERN_C_END + +#endif diff --git a/src/core/fex/7z_C/Ppmd8Dec.c b/src/core/fex/7z_C/Ppmd8Dec.c new file mode 100644 index 00000000..944d7d89 --- /dev/null +++ b/src/core/fex/7z_C/Ppmd8Dec.c @@ -0,0 +1,295 @@ +/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder +2023-09-07 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#include "Precomp.h" + +#include "Ppmd8.h" + +#define kTop ((UInt32)1 << 24) +#define kBot ((UInt32)1 << 15) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream.In) + +BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + p->Low = 0; + + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM(p) \ + while ((p->Low ^ (p->Low + p->Range)) < kTop \ + || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \ + p->Code = (p->Code << 8) | READ_BYTE(p); \ + p->Range <<= 8; p->Low <<= 8; } + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R p + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd8_RD_Decode(CPpmd8 *p, UInt32 start, UInt32 size) +{ + start *= R->Range; + R->Low += start; + R->Code -= start; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd8_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +// typedef CPpmd8_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd8_UpdateModel(CPpmd8 *p); + +#define MASK(sym) ((Byte *)charMask)[sym] + + +int Ppmd8_DecodeSymbol(CPpmd8 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 0) + { + CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + PPMD8_CORRECT_SUM_RANGE(p, summFreq) + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = p->MinContext->NumStats; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD8_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd8Context_OneState(p->MinContext); + UInt16 *prob = Ppmd8_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM(R) + + + + // sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol; + // Ppmd8_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd8_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 196)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart) + p->MaxContext = p->MinContext = c; + else + Ppmd8_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(rc2, size0); + R->Low += size0; + R->Code -= size0; + R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + UInt32 freqSum2; + CPpmd_See *see; + CPpmd8_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD8_SYM_END; + mc = Ppmd8_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd8_GetStats(p, mc); + + { + unsigned num = (unsigned)mc->NumStats + 1; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0))); + hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + freqSum2 = freqSum; + PPMD8_CORRECT_SUM_RANGE(R, freqSum2) + + + count = RC_GetThreshold(freqSum2); + + if (count < hiCnt) + { + Byte sym; + // Ppmd_See_UPDATE(see) // new (see->Summ) value can overflow over 16-bits in some rare cases + s = Ppmd8_GetStats(p, p->MinContext); + hiCnt = count; + + + { + for (;;) + { + count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd8_Update2(p); + return sym; + } + + if (count >= freqSum2) + return PPMD8_SYM_ERROR; + + RC_Decode(hiCnt, freqSum2 - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd8_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats + 1; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +#undef kTop +#undef kBot +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/src/core/fex/7z_C/Precomp.h b/src/core/fex/7z_C/Precomp.h new file mode 100644 index 00000000..97652281 --- /dev/null +++ b/src/core/fex/7z_C/Precomp.h @@ -0,0 +1,127 @@ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H + +/* + this file must be included before another *.h files and before . + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ + +#include "Compiler.h" + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before , + if we want to define _WIN32_WINNT before . +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 + +#endif diff --git a/src/core/fex/7z_C/RotateDefs.h b/src/core/fex/7z_C/RotateDefs.h new file mode 100644 index 00000000..8026cd10 --- /dev/null +++ b/src/core/fex/7z_C/RotateDefs.h @@ -0,0 +1,50 @@ +/* RotateDefs.h -- Rotate functions +2023-06-18 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_ROTATE_DEFS_H +#define ZIP7_INC_ROTATE_DEFS_H + +#ifdef _MSC_VER + +#include + +/* don't use _rotl with old MINGW. It can insert slow call to function. */ + +/* #if (_MSC_VER >= 1200) */ +#pragma intrinsic(_rotl) +#pragma intrinsic(_rotr) +/* #endif */ + +#define rotlFixed(x, n) _rotl((x), (n)) +#define rotrFixed(x, n) _rotr((x), (n)) + +#if (_MSC_VER >= 1300) +#define Z7_ROTL64(x, n) _rotl64((x), (n)) +#define Z7_ROTR64(x, n) _rotr64((x), (n)) +#else +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#else + +/* new compilers can translate these macros to fast commands. */ + +#if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5) +/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63))) +#else +/* for old GCC / clang: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#endif + +#endif diff --git a/src/core/fex/7z_C/Types.h b/src/core/fex/7z_C/Types.h deleted file mode 100644 index 30c5ed13..00000000 --- a/src/core/fex/7z_C/Types.h +++ /dev/null @@ -1,256 +0,0 @@ -/* Types.h -- Basic types -2010-10-09 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#include - -#ifdef _WIN32 -#include -#endif - -#ifndef EXTERN_C_BEGIN -#ifdef __cplusplus -#define EXTERN_C_BEGIN extern "C" { -#define EXTERN_C_END } -#else -#define EXTERN_C_BEGIN -#define EXTERN_C_END -#endif -#endif - -EXTERN_C_BEGIN - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - -#ifdef _WIN32 -typedef DWORD WRes; -#else -typedef int WRes; -#endif - -#ifndef RINOK -#define RINOK(x) \ - { \ - int __result__ = (x); \ - if (__result__ != 0) \ - return __result__; \ - } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n##ULL -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int Bool; -#define True 1 -#define False 0 - -#ifdef _WIN32 -#define MY_STD_CALL __stdcall -#else -#define MY_STD_CALL -#endif - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall - -#else - -#define MY_CDECL -#define MY_FAST_CALL - -#endif - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct -{ - Byte (*Read)(void* p); /* reads one byte, returns 0 in case of EOF or error */ -} IByteIn; - -typedef struct -{ - void (*Write)(void* p, Byte b); -} IByteOut; - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -} ISeqInStream; - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf); - -typedef struct -{ - size_t (*Write)(void* p, const void* buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -} ISeqOutStream; - -typedef enum { - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ISeekInStream; - -typedef struct -{ - SRes (*Look)(void* p, const void** buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void* p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(void* p, void* buf, size_t* size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ILookInStream; - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size); -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size); - -#define LookToRead_BUF_SIZE (1 << 14) - -typedef struct -{ - ILookInStream s; - ISeekInStream* realStream; - size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead); -void LookToRead_Init(CLookToRead* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead* p); - -typedef struct -{ - SRes (*Progress)(void* p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; - -typedef struct -{ - void* (*Alloc)(void* p, size_t size); - void (*Free)(void* p, void* address); /* address can be 0 */ -} ISzAlloc; - -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) - -#ifdef _WIN32 - -#define CHAR_PATH_SEPARATOR '\\' -#define WCHAR_PATH_SEPARATOR L'\\' -#define STRING_PATH_SEPARATOR "\\" -#define WSTRING_PATH_SEPARATOR L"\\" - -#else - -#define CHAR_PATH_SEPARATOR '/' -#define WCHAR_PATH_SEPARATOR L'/' -#define STRING_PATH_SEPARATOR "/" -#define WSTRING_PATH_SEPARATOR L"/" - -#endif - -EXTERN_C_END - -#endif diff --git a/src/core/fex/CMakeLists.txt b/src/core/fex/CMakeLists.txt index f3fcf782..a8e16703 100644 --- a/src/core/fex/CMakeLists.txt +++ b/src/core/fex/CMakeLists.txt @@ -15,7 +15,7 @@ target_sources(vbam-fex 7z_C/7zCrc.h 7z_C/7zCrcOpt.c 7z_C/7zDec.c - 7z_C/7zIn.c + 7z_C/7zArcIn.c 7z_C/7zStream.c 7z_C/Bcj2.c 7z_C/Bcj2.h @@ -24,6 +24,8 @@ target_sources(vbam-fex 7z_C/Bra86.c 7z_C/CpuArch.c 7z_C/CpuArch.h + 7z_C/Delta.c + 7z_C/Delta.h 7z_C/Lzma2Dec.c 7z_C/Lzma2Dec.h 7z_C/LzmaDec.c @@ -32,7 +34,11 @@ target_sources(vbam-fex 7z_C/Ppmd7.c 7z_C/Ppmd7.h 7z_C/Ppmd7Dec.c - 7z_C/Types.h + 7z_C/Ppmd7aDec.c + 7z_C/Ppmd8.c + 7z_C/Ppmd8.h + 7z_C/Ppmd8Dec.c + 7z_C/7zTypes.h fex/Binary_Extractor.cpp fex/Binary_Extractor.h fex/blargg_common.cpp diff --git a/src/core/fex/fex/Zip7_Extractor.cpp b/src/core/fex/fex/Zip7_Extractor.cpp index a4b9f181..d8842e44 100644 --- a/src/core/fex/fex/Zip7_Extractor.cpp +++ b/src/core/fex/fex/Zip7_Extractor.cpp @@ -29,7 +29,7 @@ static ISzAlloc zip7_alloc_temp = { SzAllocTemp, SzFreeTemp }; struct Zip7_Extractor_Impl : ISeekInStream { - CLookToRead look; + CLookToRead2 look; CSzArEx db; // SzExtract state @@ -46,11 +46,11 @@ extern "C" // 7-zip callbacks pass an ISeekInStream* for data, so we must cast it // back to ISeekInStream* FIRST, then cast to our Impl structure - static SRes zip7_read_( void* vstream, void* out, size_t* size ) + static SRes zip7_read_( ISeekInStreamPtr vstream, void* out, size_t* size ) { assert( out && size ); - ISeekInStream* stream = STATIC_CAST(ISeekInStream*,vstream); - Zip7_Extractor_Impl* impl = STATIC_CAST(Zip7_Extractor_Impl*,stream); + ISeekInStreamPtr stream = (ISeekInStreamPtr)vstream; + Zip7_Extractor_Impl* impl = (Zip7_Extractor_Impl*)stream; long lsize = (long)*size; blargg_err_t err = impl->in->read_avail( out, &lsize ); @@ -65,10 +65,10 @@ extern "C" return SZ_OK; } - static SRes zip7_seek_( void* vstream, Int64* pos, ESzSeek mode ) + static SRes zip7_seek_( ISeekInStreamPtr vstream, Int64* pos, ESzSeek mode ) { - ISeekInStream* stream = STATIC_CAST(ISeekInStream*,vstream); - Zip7_Extractor_Impl* impl = STATIC_CAST(Zip7_Extractor_Impl*,stream); + ISeekInStreamPtr stream = (ISeekInStreamPtr)vstream; + Zip7_Extractor_Impl* impl = (Zip7_Extractor_Impl*)stream; // assert( mode != SZ_SEEK_CUR ); // never used @@ -171,16 +171,16 @@ blargg_err_t Zip7_Extractor::open_v() impl->buf = NULL; impl->buf_size = 0; - LookToRead_CreateVTable( &impl->look, false ); + LookToRead2_CreateVTable( &impl->look, false ); impl->ISeekInStream::Read = zip7_read_; impl->ISeekInStream::Seek = zip7_seek_; impl->look.realStream = impl; - LookToRead_Init( &impl->look ); + LookToRead2_INIT( &impl->look ); SzArEx_Init( &impl->db ); impl->in_err = NULL; - RETURN_ERR( zip7_err( SzArEx_Open( &impl->db, &impl->look.s, + RETURN_ERR( zip7_err( SzArEx_Open( &impl->db, &impl->look.vt, &zip7_alloc, &zip7_alloc_temp ) ) ); return seek_arc_v( 0 ); @@ -271,18 +271,17 @@ bool Zip7_Extractor::utf16ToUtf8( unsigned char* dest, size_t* destLen, const sh blargg_err_t Zip7_Extractor::next_v() { - while ( ++index < (int) impl->db.db.NumFiles ) + while ( ++index < (int) impl->db.NumFiles ) { - CSzFileItem const& item = impl->db.db.Files [index]; - if ( !item.IsDir ) + if ( !SzArEx_IsDir(&impl->db, index) ) { - if ( item.MTimeDefined ) + if ( SzBitWithVals_Check(&impl->db.MTime, index) ) { const UInt64 epoch = ((UInt64)0x019db1de << 32) + 0xd53e8000; /* 0x019db1ded53e8000ULL: 1970-01-01 00:00:00 (UTC) */ struct tm tm; - UInt64 time = ((UInt64)item.MTime.High << 32) + item.MTime.Low - epoch; + UInt64 time = ((((UInt64)impl->db.MTime.Vals[index].High) << 32) | impl->db.MTime.Vals[index].Low) - epoch; time /= 1000000; time_t _time = time; @@ -310,7 +309,7 @@ blargg_err_t Zip7_Extractor::next_v() name8.resize( utf8_length + 1 ); memcpy( name8.begin(), temp, utf8_length + 1 ); set_name( name8.begin(), name16.begin() ); - set_info( (int)item.Size, 0, (item.CrcDefined ? item.Crc : 0) ); + set_info( (int)SzArEx_GetFileSize(&impl->db, index), 0, (SzBitWithVals_Check(&impl->db.CRCs, index) ? impl->db.CRCs.Vals[index] : 0) ); break; } } @@ -330,7 +329,7 @@ fex_pos_t Zip7_Extractor::tell_arc_v() const blargg_err_t Zip7_Extractor::seek_arc_v( fex_pos_t pos ) { - assert( 0 <= pos && pos <= (int) impl->db.db.NumFiles ); + assert( 0 <= pos && pos <= (int) impl->db.NumFiles ); index = pos - 1; return next_v(); @@ -341,7 +340,7 @@ blargg_err_t Zip7_Extractor::data_v( void const** out ) impl->in_err = NULL; size_t offset = 0; size_t count = 0; - RETURN_ERR( zip7_err( SzArEx_Extract( &impl->db, &impl->look.s, index, + RETURN_ERR( zip7_err( SzArEx_Extract( &impl->db, &impl->look.vt, index, &impl->block_index, &impl->buf, &impl->buf_size, &offset, &count, &zip7_alloc, &zip7_alloc_temp ) ) ); assert( count == (size_t) size() );