diff --git a/src/Common/lzma/7zTypes.h b/src/Common/lzma/7zTypes.h index 5b77420a..8aaabc8f 100644 --- a/src/Common/lzma/7zTypes.h +++ b/src/Common/lzma/7zTypes.h @@ -1,5 +1,5 @@ /* 7zTypes.h -- Basic types -2024-01-24 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_7Z_TYPES_H #define ZIP7_7Z_TYPES_H @@ -46,8 +46,9 @@ typedef int SRes; #ifdef _MSC_VER + #define MY_ALIGN_IN_STRUCT(n) __declspec(align(n)) #if _MSC_VER > 1200 - #define MY_ALIGN(n) __declspec(align(n)) + #define MY_ALIGN(n) MY_ALIGN_IN_STRUCT(n) #else #define MY_ALIGN(n) #endif @@ -58,6 +59,7 @@ typedef int SRes; #define MY_ALIGN(n) alignas(n) */ #define MY_ALIGN(n) __attribute__ ((aligned(n))) + #define MY_ALIGN_IN_STRUCT(n) MY_ALIGN(n) #endif diff --git a/src/Common/lzma/7zWindows.h b/src/Common/lzma/7zWindows.h index 42c6db8b..381159ed 100644 --- a/src/Common/lzma/7zWindows.h +++ b/src/Common/lzma/7zWindows.h @@ -1,11 +1,17 @@ -/* 7zWindows.h -- StdAfx -2023-04-02 : Igor Pavlov : Public domain */ +/* 7zWindows.h -- Windows.h and related code +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_7Z_WINDOWS_H #define ZIP7_INC_7Z_WINDOWS_H #ifdef _WIN32 +#if defined(_MSC_VER) && _MSC_VER >= 1950 && !defined(__clang__) // VS2026 +// and some another windows files need that option +// VS2026: wtypesbase.h: warning C4865: 'tagCLSCTX': the underlying type will change from 'int' to 'unsigned int' when '/Zc:enumTypes' is specified on the command line +#pragma warning(disable : 4865) +#endif + #if defined(__clang__) # pragma clang diagnostic push #endif diff --git a/src/Common/lzma/Alloc.c b/src/Common/lzma/Alloc.c index 63e1a121..419fa375 100644 --- a/src/Common/lzma/Alloc.c +++ b/src/Common/lzma/Alloc.c @@ -1,5 +1,5 @@ /* Alloc.c -- Memory allocation functions -2024-02-18 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -24,8 +24,6 @@ #endif // #define SZ_ALLOC_DEBUG -/* #define SZ_ALLOC_DEBUG */ - /* use SZ_ALLOC_DEBUG to debug alloc/free operations */ #ifdef SZ_ALLOC_DEBUG @@ -34,9 +32,10 @@ static int g_allocCount = 0; #ifdef _WIN32 static int g_allocCountMid = 0; +#ifdef Z7_LARGE_PAGES static int g_allocCountBig = 0; #endif - +#endif #define CONVERT_INT_TO_STR(charType, tempSize) \ char temp[tempSize]; unsigned i = 0; \ @@ -140,8 +139,10 @@ static void PrintAddr(void *p) #else #ifdef _WIN32 +#ifdef Z7_LARGE_PAGES #define PRINT_ALLOC(name, cnt, size, ptr) #endif +#endif #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() @@ -245,6 +246,7 @@ void MidFree(void *address) } #ifdef Z7_LARGE_PAGES +// #pragma message("Z7_LARGE_PAGES") #ifdef MEM_LARGE_PAGES #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES @@ -253,32 +255,14 @@ void MidFree(void *address) #endif extern -SIZE_T g_LargePageSize; -SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); - -void SetLargePageSize(void) -{ - SIZE_T size; -#ifdef Z7_USE_DYN_GetLargePageMinimum -Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION - - const - Func_GetLargePageMinimum fn = - (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), - "GetLargePageMinimum"); - if (!fn) - return; - size = fn(); -#else - size = GetLargePageMinimum(); -#endif - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; -} - -#endif // Z7_LARGE_PAGES +size_t g_LargePageSize; +size_t g_LargePageSize = 0; +extern +size_t g_LargePageThresholdMin; +size_t g_LargePageThresholdMin = 0; +extern +UInt32 g_LargePageFlags; +UInt32 g_LargePageFlags = 0; void *BigAlloc(size_t size) { @@ -289,12 +273,10 @@ void *BigAlloc(size_t size) #ifdef Z7_LARGE_PAGES { - SIZE_T ps = g_LargePageSize; - if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + const size_t ps = g_LargePageSize - 1; + if (ps < (1u << 30) && size > g_LargePageThresholdMin) { - size_t size2; - ps--; - size2 = (size + ps) & ~ps; + const size_t size2 = (size + ps) & ~ps; if (size2 >= size) { void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE); @@ -303,6 +285,8 @@ void *BigAlloc(size_t size) PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) return p; } + if (g_LargePageFlags & Z7_LARGE_PAGES_FLAG_FAIL_STOP) + return p; } } } @@ -317,6 +301,7 @@ void BigFree(void *address) MidFree(address); } +#endif // Z7_LARGE_PAGES #endif // _WIN32 @@ -327,9 +312,12 @@ const ISzAlloc g_Alloc = { SzAlloc, SzFree }; #ifdef _WIN32 static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); } static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; +#endif + +#if defined(Z7_LARGE_PAGES) static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); } static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); } -const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif @@ -371,10 +359,16 @@ typedef #endif -#if !defined(_WIN32) \ - && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ - || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) +#ifndef _WIN32 +#include // for _POSIX_ADVISORY_INFO : for some linux +#if (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ + || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) \ + || defined(_POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO >= 200112L) \ + || defined(__APPLE__) \ + /* || defined(__linux__) */) #define USE_posix_memalign + // #pragma message("USE_posix_memalign") +#endif #endif #ifndef USE_posix_memalign @@ -488,6 +482,181 @@ static void SzAlignedFree(ISzAllocPtr pp, void *address) #endif } +#ifndef _WIN32 + +#ifdef Z7_LARGE_PAGES + +#if 0 // 1 for debug + #include + #include // for strerror() + #define PRF(x) x +#else + #define PRF(x) +#endif + +#ifdef USE_posix_memalign + /* madvise(): + glibc <= 2.19 : _BSD_SOURCE + glibc > 2.19 : _DEFAULT_SOURCE + */ + /* && (defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE)) */ +#if 1 && !defined(Z7_NO_MADVISE) && \ + (defined(__linux__) || defined(__unix__) || defined(__APPLE__)) +#include // for madvise +// #pragma message("sys/mman.h") +#if (defined(MADV_HUGEPAGE) && defined(MADV_NOHUGEPAGE)) + #define Z7_USE_BIG_ALLOC_MADVISE + // #pragma message("Z7_USE_BIG_ALLOC_MADVISE") +#endif +#endif +#endif // USE_posix_memalign + +#ifdef Z7_USE_BIG_ALLOC_MADVISE +#define LARGE_PAGE_SIZE_DEFAULT (1 << 21) +#else +#define LARGE_PAGE_SIZE_DEFAULT 0 +#endif + +extern +size_t g_LargePageSize; +size_t g_LargePageSize = LARGE_PAGE_SIZE_DEFAULT; +extern +size_t g_LargePageThresholdMin; +size_t g_LargePageThresholdMin = LARGE_PAGE_SIZE_DEFAULT / 2; +extern +UInt32 g_LargePageFlags; +UInt32 g_LargePageFlags = 0; + +void *BigAlloc(size_t size) +{ + if (size == 0) + return NULL; +#ifdef USE_posix_memalign + { + const size_t pageSize = g_LargePageSize; + void *buf = NULL; // on Linux (and other systems), posix_memalign() does not modify memptr on failure (POSIX.1-2008 TC2). + PRF(printf("\nBigAlloc 0x%08x=%5uMB", (unsigned)(size), (unsigned)(size >> 20));) + if (pageSize && size > g_LargePageThresholdMin) + { + int res; + const size_t mask = pageSize - 1; + /* we can allocate aligned size, so data at the end of buffer also will use huge page + if (size2 for madvise() is not aligned for huge page size) + { Last data block will use small pages. It reduces memory allocation, + but last data block with small pages can work slower. + It's useful, if we have very large HUGE_PAGE: 32MB or 512MB. } + */ + size_t size2 = (size + mask) & ~mask; + if (size2 < size || (size & mask) <= g_LargePageThresholdMin) + size2 = size; + res = posix_memalign(&buf, pageSize, size2); + PRF(printf(" posix_memalign size=0x%08x=%5uMB align=%u", + (unsigned)(size2), (unsigned)(size2 >> 20), (unsigned)pageSize);) + PRF(printf(" buf=%p", (void *)buf);) + if (res == 0) + { +#ifdef Z7_USE_BIG_ALLOC_MADVISE + if ((g_LargePageFlags & Z7_LARGE_PAGES_FLAG_NO_MADVISE) == 0) + { + // Advise the kernel to use huge pages for this memory range + // MADV_HUGEPAGE / MADV_NOHUGEPAGE : since Linux 2.6.38 + // madvise() only operates on whole pages, therefore addr must be page-aligned (4KB/8KB/16KB/64KB). + // The value of size is rounded up to a multiple of page size. + PRF(printf(" madvise g_LargePageFlags=%x", (unsigned)g_LargePageFlags);) + res = madvise(buf, size2, (g_LargePageFlags & Z7_LARGE_PAGES_FLAG_NO_HUGEPAGE) ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + if (res) + { + PRF(printf("\nERROR res=%d, errno=%d=%s\n", res, (int)errno, strerror(errno));) + if (g_LargePageFlags & Z7_LARGE_PAGES_FLAG_FAIL_STOP) + { + free(buf); + return NULL; + } + } + } +#endif // Z7_USE_BIG_ALLOC_MADVISE + PRF(printf("\n");) + return buf; + } + PRF(printf("\nERROR res=%d=%s\n", res, strerror(res));) + if (g_LargePageFlags & Z7_LARGE_PAGES_FLAG_FAIL_STOP) + return NULL; + // (res == ENOMEM) "Out of memory" is possible, if pageSize is too big. + // so we do second attempt with smaller alignment + } + } +#endif // !USE_posix_memalign + PRF(printf(" z7_AlignedAlloc size=0x%08x=%5uMB\n", (unsigned)(size), (unsigned)(size >> 20));) + return z7_AlignedAlloc(size); +} + + +void BigFree(void *address) +{ + z7_AlignedFree(address); +} +#endif // Z7_LARGE_PAGES +#endif // !_WIN32 + + +#ifdef Z7_LARGE_PAGES +void z7_LargePage_Set(UInt32 flags, size_t pageSize, size_t threshold) +{ + g_LargePageFlags = flags; + +#ifdef _WIN32 + if ((flags & Z7_LARGE_PAGES_FLAG_USE_HUGEPAGE) == 0) + { + g_LargePageSize = 0; + g_LargePageThresholdMin = 0; + } + else + { + if ((flags & Z7_LARGE_PAGES_FLAG_DIRECT_PAGE_SIZE) == 0) + { +#ifdef Z7_USE_DYN_GetLargePageMinimum + Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); + const + Func_GetLargePageMinimum fn = + (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetLargePageMinimum"); + if (fn) + pageSize = fn(); + else + pageSize = 0; +#else + pageSize = GetLargePageMinimum(); +#endif + if (pageSize & (pageSize - 1)) + pageSize = 0; + } + g_LargePageSize = pageSize; + if ((flags & Z7_LARGE_PAGES_FLAG_DIRECT_THRESHOLD) == 0) + threshold = pageSize / 2; + g_LargePageThresholdMin = threshold; + } + +#else // !_WIN32 + + if (flags & Z7_LARGE_PAGES_FLAG_NO_PAGECODE) + { + g_LargePageSize = 0; + g_LargePageThresholdMin = 0; + } + else + { + if ((flags & Z7_LARGE_PAGES_FLAG_DIRECT_PAGE_SIZE) == 0) + pageSize = LARGE_PAGE_SIZE_DEFAULT; + g_LargePageSize = pageSize; + if ((flags & Z7_LARGE_PAGES_FLAG_DIRECT_THRESHOLD) == 0) + threshold = pageSize / 2; + g_LargePageThresholdMin = threshold; + } + // PRF(printf("\ng_LargePageSize=%x g_LargePageThresholdMin = %x g_LargePageFlags = %x", (unsigned)g_LargePageSize, (unsigned)g_LargePageThresholdMin, (unsigned)g_LargePageFlags);) +#endif // !_WIN32 +} +#endif // Z7_LARGE_PAGES const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; diff --git a/src/Common/lzma/Alloc.h b/src/Common/lzma/Alloc.h index 01bf6b7d..05d3c2ce 100644 --- a/src/Common/lzma/Alloc.h +++ b/src/Common/lzma/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_ALLOC_H #define ZIP7_INC_ALLOC_H @@ -25,40 +25,40 @@ void *MyRealloc(void *address, size_t size); void *z7_AlignedAlloc(size_t size); void z7_AlignedFree(void *p); +extern const ISzAlloc g_Alloc; +extern const ISzAlloc g_AlignedAlloc; + #ifdef _WIN32 + void *MidAlloc(size_t size); + void MidFree(void *address); + extern const ISzAlloc g_MidAlloc; +#else + #define MidAlloc(size) z7_AlignedAlloc(size) + #define MidFree(address) z7_AlignedFree(address) + #define g_MidAlloc g_AlignedAlloc +#endif #ifdef Z7_LARGE_PAGES -void SetLargePageSize(void); -#endif -void *MidAlloc(size_t size); -void MidFree(void *address); -void *BigAlloc(size_t size); -void BigFree(void *address); +#define Z7_LARGE_PAGES_FLAG_USE_HUGEPAGE (1 << 0) // PAGE_ALIGNED / MADV_HUGEPAGE +#define Z7_LARGE_PAGES_FLAG_NO_PAGECODE (1 << 1) // no PAGE_ALIGNED / no madvise +#define Z7_LARGE_PAGES_FLAG_NO_MADVISE (1 << 2) // PAGE_ALIGNED / no madvise : for THP=always +#define Z7_LARGE_PAGES_FLAG_NO_HUGEPAGE (1 << 3) // PAGE_ALIGNED / MADV_NOHUGEPAGE +#define Z7_LARGE_PAGES_FLAG_FAIL_STOP (1 << 15) // for benchmarks +#define Z7_LARGE_PAGES_FLAG_DIRECT_PAGE_SIZE (1 << 16) +#define Z7_LARGE_PAGES_FLAG_DIRECT_THRESHOLD (1 << 17) -/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */ +void z7_LargePage_Set(UInt32 flags, size_t pageSize, size_t threshold); + void *BigAlloc(size_t size); + void BigFree(void *address); + extern const ISzAlloc g_BigAlloc; #else - -#define MidAlloc(size) z7_AlignedAlloc(size) -#define MidFree(address) z7_AlignedFree(address) -#define BigAlloc(size) z7_AlignedAlloc(size) -#define BigFree(address) z7_AlignedFree(address) - + #define BigAlloc(size) MidAlloc(size) + #define BigFree(address) MidFree(address) + #define g_BigAlloc g_MidAlloc #endif -extern const ISzAlloc g_Alloc; - -#ifdef _WIN32 -extern const ISzAlloc g_BigAlloc; -extern const ISzAlloc g_MidAlloc; -#else -#define g_BigAlloc g_AlignedAlloc -#define g_MidAlloc g_AlignedAlloc -#endif - -extern const ISzAlloc g_AlignedAlloc; - typedef struct { diff --git a/src/Common/lzma/Compiler.h b/src/Common/lzma/Compiler.h index 2a9c2b7a..a3577b25 100644 --- a/src/Common/lzma/Compiler.h +++ b/src/Common/lzma/Compiler.h @@ -1,5 +1,5 @@ /* Compiler.h : Compiler specific defines and pragmas -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H @@ -54,6 +54,12 @@ #pragma GCC diagnostic ignored "-Wexcess-padding" #endif +#if defined(Z7_APPLE_CLANG_VERSION) && __clang_major__ >= 21 +// warning: function MyAlloc might be an allocator wrapper +// clang in xcode: clang 21.0.0 +#pragma GCC diagnostic ignored "-Wallocator-wrappers" +#endif + #if __clang_major__ >= 16 #pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" #endif @@ -72,7 +78,7 @@ #endif // __clang__ -#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +#if defined(__clang__) && __clang_major__ >= 16 // #pragma GCC diagnostic ignored "-Wcast-function-type-strict" #define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") @@ -183,6 +189,16 @@ typedef void (*Z7_void_Function)(void); #define Z7_ATTRIB_NO_VECTORIZE #endif +#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920) + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )") + #define Z7_PRAGMA_OPTIMIZE_DEFAULT _Pragma("optimize ( \"\", on )") +#else + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE + #define Z7_PRAGMA_OPTIMIZE_DEFAULT +#endif + + + #if defined(MY_CPU_X86_OR_AMD64) && ( \ defined(__clang__) && (__clang_major__ >= 4) \ || defined(__GNUC__) && (__GNUC__ >= 5)) diff --git a/src/Common/lzma/CpuArch.c b/src/Common/lzma/CpuArch.c index 6e02551e..342280d0 100644 --- a/src/Common/lzma/CpuArch.c +++ b/src/Common/lzma/CpuArch.c @@ -859,7 +859,7 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) #define Z7_GETAUXV_AVAILABLE -#else +#elif !defined(__QNXNTO__) // #pragma message("=== is not NEW GLIBC === ") #if defined __has_include #if __has_include () @@ -877,7 +877,7 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #ifdef USE_HWCAP -#if defined(__FreeBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) static unsigned long MY_getauxval(int aux) { unsigned long val; diff --git a/src/Common/lzma/CpuArch.h b/src/Common/lzma/CpuArch.h index a6297ea4..9df7f2c7 100644 --- a/src/Common/lzma/CpuArch.h +++ b/src/Common/lzma/CpuArch.h @@ -31,7 +31,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_NAME "x32" #define MY_CPU_SIZEOF_POINTER 4 #else - #define MY_CPU_NAME "x64" + #if defined(__APX_EGPR__) || defined(__EGPR__) + #define MY_CPU_NAME "x64-apx" + #define MY_CPU_AMD64_APX + #else + #define MY_CPU_NAME "x64" + #endif #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT @@ -47,6 +52,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_SIZEOF_POINTER 4 #endif +#if defined(__SSE2__) \ + || defined(MY_CPU_AMD64) \ + || defined(_M_IX86_FP) && (_M_IX86_FP >= 2) +#define MY_CPU_SSE2 +#endif + #if defined(_M_ARM64) \ || defined(_M_ARM64EC) \ @@ -243,11 +254,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +// _LITTLE_ENDIAN macro can be defined for big-endian platform with some compilers + #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ - || defined(_LITTLE_ENDIAN) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ || defined(__THUMBEL__) \ @@ -571,10 +583,12 @@ problem-4 : performace: #define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_NATIVE_TO_BE_32(v) (v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b1) | ((b0) << 8)) #elif defined(MY_CPU_LE) #define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b0) | ((b1) << 8)) #else #error Stop_Compiling_Unknown_Endian_CONV #endif @@ -588,8 +602,20 @@ problem-4 : performace: #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +// gcc and clang for powerpc can transform load byte access to load reverse word access. +// sp we can use byte access instead of word access. Z7_BSWAP64 cab be slow +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_64BIT) +#define GetUi64a(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#else #define GetUi64a(p) GetUi64(p) +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#define GetUi32a(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#else #define GetUi32a(p) GetUi32(p) +#endif + #define GetUi16a(p) GetUi16(p) #define SetUi32a(p, v) SetUi32(p, v) #define SetUi16a(p, v) SetUi16(p, v) diff --git a/src/Common/lzma/LzFind.c b/src/Common/lzma/LzFind.c index 1ce40464..330bc172 100644 --- a/src/Common/lzma/LzFind.c +++ b/src/Common/lzma/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -404,7 +404,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, const unsigned nbMax = (p->numHashBytes == 2 ? 16 : (p->numHashBytes == 3 ? 24 : 32)); - if (numBits > nbMax) + if (numBits >= nbMax) numBits = nbMax; if (numBits >= 32) hs = (UInt32)0 - 1; @@ -416,14 +416,14 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (256 << kLzHash_CrcShift_2) - 1; { const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); - if (hs > hs2) + if (hs >= hs2) hs = hs2; } hsCur = hs; if (p->expectedDataSize < historySize) { const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); - if (hsCur > hs2) + if (hsCur >= hs2) hsCur = hs2; } } @@ -434,7 +434,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->expectedDataSize < historySize) { hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); - if (hsCur > hs) // is it possible? + if (hsCur >= hs) // is it possible? hsCur = hs; } } @@ -598,7 +598,7 @@ void MatchFinder_Init(void *_p) #ifdef MY_CPU_X86_OR_AMD64 #if defined(__clang__) && (__clang_major__ >= 4) \ - || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) #define USE_LZFIND_SATUR_SUB_128 @@ -890,7 +890,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, return d; { const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { UInt32 len = 0; @@ -925,7 +925,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, break; { ptrdiff_t diff; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; diff = (ptrdiff_t)0 - (ptrdiff_t)delta; if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { @@ -972,7 +972,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (cmCheck < curMatch) @@ -980,7 +980,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); const UInt32 pair0 = pair[0]; @@ -1039,7 +1039,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const UInt32 cmCheck; cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (// curMatch >= pos || // failure @@ -1048,7 +1048,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) @@ -1595,7 +1595,7 @@ static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) UInt32 pos = p->pos; \ UInt32 num2 = num; \ /* (p->pos == p->posLimit) is not allowed here !!! */ \ - { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \ num -= num2; \ { const UInt32 cycPos = p->cyclicBufferPos; \ son = p->son + cycPos; \ diff --git a/src/Common/lzma/LzFindMt.c b/src/Common/lzma/LzFindMt.c index ac9d59d0..25fcc465 100644 --- a/src/Common/lzma/LzFindMt.c +++ b/src/Common/lzma/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -82,6 +82,8 @@ extern UInt64 g_NumIters_Bytes; Z7_NO_INLINE static void MtSync_Construct(CMtSync *p) { + p->affinityGroup = -1; + p->affinityInGroup = 0; p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; @@ -259,6 +261,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * // return ERROR_TOO_MANY_POSTS; // for debug // return EINVAL; // for debug +#ifdef _WIN32 + if (p->affinityGroup >= 0) + wres = Thread_Create_With_Group(&p->thread, startAddress, obj, + (unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup); + else +#endif if (p->affinity != 0) wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); else diff --git a/src/Common/lzma/LzFindMt.h b/src/Common/lzma/LzFindMt.h index fcb479da..89984f52 100644 --- a/src/Common/lzma/LzFindMt.h +++ b/src/Common/lzma/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H @@ -12,8 +12,10 @@ EXTERN_C_BEGIN typedef struct { UInt32 numProcessedBlocks; - CThread thread; + Int32 affinityGroup; + UInt64 affinityInGroup; UInt64 affinity; + CThread thread; BoolInt wasCreated; BoolInt needStart; diff --git a/src/Common/lzma/LzmaEnc.c b/src/Common/lzma/LzmaEnc.c index 088b78f8..60f1d21d 100644 --- a/src/Common/lzma/LzmaEnc.c +++ b/src/Common/lzma/LzmaEnc.c @@ -62,7 +62,9 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->numHashOutBits = 0; p->writeEndMark = 0; + p->affinityGroup = -1; p->affinity = 0; + p->affinityInGroup = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -598,6 +600,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) p->multiThread = (props.numThreads > 1); p->matchFinderMt.btSync.affinity = p->matchFinderMt.hashSync.affinity = props.affinity; + p->matchFinderMt.btSync.affinityGroup = + p->matchFinderMt.hashSync.affinityGroup = props.affinityGroup; + p->matchFinderMt.btSync.affinityInGroup = + p->matchFinderMt.hashSync.affinityInGroup = props.affinityInGroup; #endif return SZ_OK; @@ -2345,10 +2351,9 @@ static void LzmaEnc_Construct(CLzmaEnc *p) CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) { - void *p; - p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + CLzmaEncHandle p = (CLzmaEncHandle)ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); if (p) - LzmaEnc_Construct((CLzmaEnc *)p); + LzmaEnc_Construct(p); return p; } diff --git a/src/Common/lzma/LzmaEnc.h b/src/Common/lzma/LzmaEnc.h index 9f8039a1..3feb5b4a 100644 --- a/src/Common/lzma/LzmaEnc.h +++ b/src/Common/lzma/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H @@ -29,11 +29,13 @@ typedef struct int numThreads; /* 1 or 2, default = 2 */ // int _pad; + Int32 affinityGroup; UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ UInt64 affinity; + UInt64 affinityInGroup; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/src/Common/lzma/Precomp.h b/src/Common/lzma/Precomp.h index 7747fdd7..83b720e1 100644 --- a/src/Common/lzma/Precomp.h +++ b/src/Common/lzma/Precomp.h @@ -1,5 +1,5 @@ /* Precomp.h -- precompilation file -2024-01-25 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_PRECOMP_H #define ZIP7_INC_PRECOMP_H @@ -40,18 +40,18 @@ #endif */ +#ifndef Z7_LARGE_PAGES +#if !defined(Z7_NO_LARGE_PAGES) && !defined(UNDER_CE) +#define Z7_LARGE_PAGES 1 +#endif +#endif + #ifdef _WIN32 /* this "Precomp.h" file must be included before , if we want to define _WIN32_WINNT before . */ -#ifndef Z7_LARGE_PAGES -#ifndef Z7_NO_LARGE_PAGES -#define Z7_LARGE_PAGES 1 -#endif -#endif - #ifndef Z7_LONG_PATH #ifndef Z7_NO_LONG_PATH #define Z7_LONG_PATH 1 diff --git a/src/Common/lzma/Threads.c b/src/Common/lzma/Threads.c index 464efeca..abef5dd1 100644 --- a/src/Common/lzma/Threads.c +++ b/src/Common/lzma/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -59,6 +59,111 @@ WRes Thread_Wait_Close(CThread *p) return (res != 0 ? res : res2); } +typedef struct MY_PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} MY_PROCESSOR_NUMBER, *MY_PPROCESSOR_NUMBER; + +typedef struct MY_GROUP_AFFINITY { +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 100000) + // KAFFINITY is not defined in old mingw + ULONG_PTR +#else + KAFFINITY +#endif + Mask; + WORD Group; + WORD Reserved[3]; +} MY_GROUP_AFFINITY, *MY_PGROUP_AFFINITY; + +typedef BOOL (WINAPI *Func_SetThreadGroupAffinity)( + HANDLE hThread, + CONST MY_GROUP_AFFINITY *GroupAffinity, + MY_PGROUP_AFFINITY PreviousGroupAffinity); + +typedef BOOL (WINAPI *Func_GetThreadGroupAffinity)( + HANDLE hThread, + MY_PGROUP_AFFINITY GroupAffinity); + +typedef BOOL (WINAPI *Func_GetProcessGroupAffinity)( + HANDLE hProcess, + PUSHORT GroupCount, + PUSHORT GroupArray); + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +#if 0 +#include +#define PRF(x) x +/* +-- + before call of SetThreadGroupAffinity() + GetProcessGroupAffinity return one group. + after call of SetThreadGroupAffinity(): + GetProcessGroupAffinity return more than group, + if SetThreadGroupAffinity() was to another group. +-- + GetProcessAffinityMask MS DOCs: + { + If the calling process contains threads in multiple groups, + the function returns zero for both affinity masks. + } + but tests in win10 with 2 groups (less than 64 cores total): + GetProcessAffinityMask() still returns non-zero affinity masks + even after SetThreadGroupAffinity() calls. +*/ +static void PrintProcess_Info() +{ + { + const + Func_GetProcessGroupAffinity fn_GetProcessGroupAffinity = + (Func_GetProcessGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetProcessGroupAffinity"); + if (fn_GetProcessGroupAffinity) + { + unsigned i; + USHORT GroupCounts[64]; + USHORT GroupCount = Z7_ARRAY_SIZE(GroupCounts); + BOOL boolRes = fn_GetProcessGroupAffinity(GetCurrentProcess(), + &GroupCount, GroupCounts); + printf("\n====== GetProcessGroupAffinity : " + "boolRes=%u GroupCounts = %u :", + boolRes, (unsigned)GroupCount); + for (i = 0; i < GroupCount; i++) + printf(" %u", GroupCounts[i]); + printf("\n"); + } + } + { + DWORD_PTR processAffinityMask, systemAffinityMask; + if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask)) + { + PRF(printf("\n====== GetProcessAffinityMask : " + ": processAffinityMask=%x, systemAffinityMask=%x\n", + (UInt32)processAffinityMask, (UInt32)systemAffinityMask);) + } + else + printf("\n==GetProcessAffinityMask FAIL"); + } +} +#else +#ifndef USE_THREADS_CreateThread +// #define PRF(x) +#endif +#endif + +/* if we send (stackSize=0) to CreateThread(), it will + use default value PE::SizeOfStackReserve from exe file. + PE::SizeOfStackReserve == 1 MiB in exe file with default linker options. + Windows aligns specified value to the next 64 KB range. */ +static const unsigned k_StackSize_ReserveSize = + #ifdef UNDER_CE + 1 << 17; + #else + 1 << 20; + #endif + WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ @@ -66,13 +171,52 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) #ifdef USE_THREADS_CreateThread DWORD threadId; - *p = CreateThread(NULL, 0, func, param, 0, &threadId); + *p = CreateThread(NULL, k_StackSize_ReserveSize, func, param, STACK_SIZE_PARAM_IS_A_RESERVATION, &threadId); #else + +#define CALL_beginthreadex(func2, param2, flags, threadIdPtr) \ + ((HANDLE)(_beginthreadex(NULL, k_StackSize_ReserveSize, func2, param2, (flags) | STACK_SIZE_PARAM_IS_A_RESERVATION, threadIdPtr))) unsigned threadId; - *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); - + *p = CALL_beginthreadex(func, param, 0, &threadId); + +#if 0 // 1 : for debug + { + DWORD_PTR prevMask; + DWORD_PTR affinity = 1 << 0; + prevMask = SetThreadAffinityMask(*p, (DWORD_PTR)affinity); + prevMask = prevMask; + } +#endif +#if 0 // 1 : for debug + { + /* win10: new thread will be created in same group that is assigned to parent thread + but affinity mask will contain all allowed threads of that group, + even if affinity mask of parent group is not full + win11: what group it will be created, if we have set + affinity of parent thread with ThreadGroupAffinity? + */ + const + Func_GetThreadGroupAffinity fn = + (Func_GetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetThreadGroupAffinity"); + if (fn) + { + // BOOL wres2; + MY_GROUP_AFFINITY groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + /* wres2 = */ fn(*p, &groupAffinity); + PRF(printf("\n==Thread_Create cur = %6u GetThreadGroupAffinity(): " + "wres2_BOOL = %u, group=%u mask=%x\n", + GetCurrentThreadId(), + wres2, + groupAffinity.Group, + (UInt32)groupAffinity.Mask);) + } + } +#endif + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ @@ -93,7 +237,7 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param HANDLE h; WRes wres; unsigned threadId; - h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + h = CALL_beginthreadex(func, param, CREATE_SUSPENDED, &threadId); *p = h; wres = HandleToWRes(h); if (h) @@ -110,7 +254,84 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param */ } { - DWORD prevSuspendCount = ResumeThread(h); + const DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask) +{ +#ifdef USE_THREADS_CreateThread + + UNUSED_VAR(group) + UNUSED_VAR(affinityMask) + return Thread_Create(p, func, param); + +#else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = CALL_beginthreadex(func, param, CREATE_SUSPENDED, &threadId); + *p = h; + wres = HandleToWRes(h); + if (h) + { + // PrintProcess_Info(); + { + const + Func_SetThreadGroupAffinity fn = + (Func_SetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "SetThreadGroupAffinity"); + if (fn) + { + // WRes wres2; + MY_GROUP_AFFINITY groupAffinity, prev_groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + // groupAffinity.Mask must use only bits that supported by current group + // (groupAffinity.Mask = 0) means all allowed bits + groupAffinity.Mask = affinityMask; + groupAffinity.Group = (WORD)group; + // wres2 = + fn(h, &groupAffinity, &prev_groupAffinity); + /* + if (groupAffinity.Group == prev_groupAffinity.Group) + wres2 = wres2; + else + wres2 = wres2; + if (wres2 == 0) + { + wres2 = GetError(); + PRF(printf("\n==SetThreadGroupAffinity error: %u\n", wres2);) + } + else + { + PRF(printf("\n==Thread_Create_With_Group::SetThreadGroupAffinity()" + " threadId = %6u" + " group=%u mask=%x\n", + threadId, + prev_groupAffinity.Group, + (UInt32)prev_groupAffinity.Mask);) + } + */ + } + } + { + const DWORD prevSuspendCount = ResumeThread(h); /* ResumeThread() returns: 0 : was_not_suspended 1 : was_resumed @@ -297,6 +518,13 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) return Thread_Create_With_CpuSet(p, func, param, NULL); } +/* +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinity) +{ + UNUSED_VAR(group) + return Thread_Create_With_Affinity(p, func, param, affinity); +} +*/ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { @@ -577,5 +805,22 @@ WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) return AutoResetEvent_CreateNotSignaled(p); } +void ThreadNextGroup_Init(CThreadNextGroup *p, UInt32 numGroups, UInt32 startGroup) +{ + // printf("\n====== ThreadNextGroup_Init numGroups = %x: startGroup=%x\n", numGroups, startGroup); + if (numGroups == 0) + numGroups = 1; + p->NumGroups = numGroups; + p->NextGroup = startGroup % numGroups; +} + + +UInt32 ThreadNextGroup_GetNext(CThreadNextGroup *p) +{ + const UInt32 next = p->NextGroup; + p->NextGroup = (next + 1) % p->NumGroups; + return next; +} + #undef PRF #undef Print diff --git a/src/Common/lzma/Threads.h b/src/Common/lzma/Threads.h index c1484a27..be12e6e7 100644 --- a/src/Common/lzma/Threads.h +++ b/src/Common/lzma/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H @@ -140,12 +140,22 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param WRes Thread_Wait_Close(CThread *p); #ifdef _WIN32 +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask); #define Thread_Create_With_CpuSet(p, func, param, cs) \ Thread_Create_With_Affinity(p, func, param, *cs) #else WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); #endif +typedef struct +{ + unsigned NumGroups; + unsigned NextGroup; +} CThreadNextGroup; + +void ThreadNextGroup_Init(CThreadNextGroup *p, unsigned numGroups, unsigned startGroup); +unsigned ThreadNextGroup_GetNext(CThreadNextGroup *p); + #ifdef _WIN32 diff --git a/src/Common/lzma/lzma-history.txt b/src/Common/lzma/lzma-history.txt index 20e0a441..de3f10cc 100644 --- a/src/Common/lzma/lzma-history.txt +++ b/src/Common/lzma/lzma-history.txt @@ -1,6 +1,47 @@ HISTORY of the LZMA SDK ----------------------- +26.01 2026-04-27 +------------------------- +- linux version of 7-Zip can use huge pages (2 MB pages). It can increase compression + speed for 10% for 7z/xz/LZMA/LZMA2 compression. +- new -spo[d|c|r] switch specifies the path generation mode for the output directory + for archive extraction. The output directory path is generated from the path specified + in the -o{dir_path} switch and the name of the archive being unpacked. + -spod : for Linux/Posix/macOS: -o{dir_path} specifies the direct path to the output directory. + The asterisk (*) character in {dir_path} will not be replaced by the archive name. + -spoc : 7-Zip will concatenate the path specified in -o{dir_path} with the archive name + to form the final path to the output directory. + -spor : 7-Zip will replace asterisk (*) character in the path specified in the -o{dir_path} + with the archive name. This is the default option. +- The 7zdec.exe program (a lightweight 7z archive decoder) has been modified for security purposes. + Now 7zdec.exe extracts files only to the current folder and its subfolders. +- some bugs were fixed. + + + +26.00 2026-02-12 +------------------------- +- some bugs were fixed. + + +25.01 2025-08-03 +------------------------- +- The code for handling symbolic links has been changed + to provide greater security when extracting files from archives. + Command line switch -snld20 can be used to bypass default security + checks when creating symbolic links. + + +25.00 2025-07-05 +------------------------- +- 7-Zip for Windows can now use more than 64 CPU threads for compression + to zip/7z/xz archives and for the 7-Zip benchmark. + If there are more than one processor group in Windows (on systems with more than + 64 cpu threads), 7-Zip distributes running CPU threads across different processor groups. +- fixed some bugs and vulnerabilities. + + 24.09 2024-11-29 ------------------------- - The default dictionary size values for LZMA/LZMA2 compression methods were increased: diff --git a/src/Common/lzma/lzma-sdk.txt b/src/Common/lzma/lzma-sdk.txt index f7016709..f26ce93e 100644 --- a/src/Common/lzma/lzma-sdk.txt +++ b/src/Common/lzma/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 24.09 +LZMA SDK 26.01 -------------- LZMA SDK provides the documentation, samples, header files, @@ -59,6 +59,34 @@ LZMA SDK Contents - console programs for lzma / 7z / xz compression and decompression - SFX modules for installers. +How to compile with makefile in Windows +--------------------------------------- + +Some macronames can be defined for compiling with makefile: + +PLATFORM + with possible values: x64, x86, arm64, arm, ia64 + +OLD_COMPILER + for old VC compiler, like MSCV 6.0. + +MY_DYNAMIC_LINK + for dynamic linking to the run-time library (msvcrt.dll). + The default makefile option is static linking to the run-time library. + +To compile 7zr.exe file for x64 with Visual Studio 2022, +use the following command sequence: + + cd SRC\CPP\7zip\Bundles\Alone7z\ + %comspec% /k "C:\Program Files\VS2022\VC\Auxiliary\Build\vcvars64.bat" + nmake + +You can use other "vcvars*.bat" files from the "VS2022\VC\Auxiliary\Build" directory +to compile for other platforms: + vcvars64.bat + vcvarsamd64_arm64.bat + vcvarsamd64_x86.bat + UNIX/Linux version ------------------