MFV: zstd 1.5.7.

MFC after:	2 weeks
Relnotes:	yes
This commit is contained in:
Xin LI
2026-04-08 14:22:45 -07:00
176 changed files with 19451 additions and 11214 deletions
+4
View File
@@ -15,6 +15,7 @@ SRCS= entropy_common.c \
zstd_compress_literals.c \
zstd_compress_sequences.c \
zstd_compress_superblock.c \
zstd_preSplit.c \
zstdmt_compress.c \
huf_decompress.c \
zstd_ddict.c \
@@ -54,6 +55,9 @@ ZSTDDIR= ${SRCTOP}/sys/contrib/zstd
.include <bsd.compiler.mk>
# These symbols are needed by dll-linked CLI zstd(1).
CFLAGS.pool.c+= -fvisibility=default
CFLAGS.huf_decompress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL}
# https://github.com/facebook/zstd/commit/812e8f2a [zstd 1.4.1]
+1
View File
@@ -649,6 +649,7 @@ contrib/zstd/lib/compress/zstd_compress.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/zstd_compress_literals.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/zstd_compress_sequences.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/zstd_compress_superblock.c optional zstdio compile-with "${ZSTD_C} ${NO_WUNUSED_BUT_SET_VARIABLE}"
contrib/zstd/lib/compress/zstd_preSplit.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/fse_compress.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/hist.c optional zstdio compile-with ${ZSTD_C}
contrib/zstd/lib/compress/huf_compress.c optional zstdio compile-with ${ZSTD_C}
+145 -3
View File
@@ -1,3 +1,145 @@
V1.5.7 (Feb 2025)
fix: compression bug in 32-bit mode associated with long-lasting sessions
api: new method `ZSTD_compressSequencesAndLiterals()` (#4217, #4232)
api: `ZSTD_getFrameHeader()` works on skippable frames (#4228)
perf: substantial compression speed improvements (up to +30%) on small data, by @TocarIP (#4144) and @cyan4973 (#4165)
perf: improved compression speed (~+5%) for dictionary compression at low levels (#4170)
perf: much faster speed for `--patch-from` at high compression levels (#4276)
perf: higher `--patch-from` compression ratios, notably at high levels (#4288)
perf: better speed for binaries on Windows (@pps83) and when compiled with Visual Studio (@MessyHack)
perf: slight compression ratio improvement thanks to better block boundaries (#4136, #4176, #4178)
perf: slight compression ratio improvement for `dfast`, aka levels 3 and 4 (#4171)
perf: runtime bmi2 detection enabled on x86 32-bit mode (#4251)
cli: multi-threading as default CLI setting, by @daniellerozenblit
cli: new `--max` command (#4290)
build: improve `msbuild` version autodetection, support VS2022, by @ManuelBlanc
build: fix `meson` build by @artem and @Victor-C-Zhang, and on Windows by @bgilbert
build: compatibility with Apple Framework, by @Treata11
build: improve icc/icx compatibility, by @josepho0918 and @luau-project
build: improve compatibility with Android NDK, by Adenilson Cavalcanti
portability: linux kernel branch, with improved support for Sequence producers (@embg, @gcabiddu, @cyan4973)
portability: improved qnx compatibility, suggested by @rainbowball
portability: improved install script for FreeBSD, by @sunpoet
portability: fixed test suite compatibility with gnu hurd, by @diegonc
doc: clarify specification, by @elasota
misc: improved tests/decodecorpus validation tool (#4102), by antmicro
V1.5.6 (Mar 2024)
api: Promote `ZSTD_c_targetCBlockSize` to Stable API by @felixhandte
api: new `ZSTD_d_maxBlockSize` experimental parameter, to reduce streaming decompression memory, by @terrelln
perf: improve performance of param `ZSTD_c_targetCBlockSize`, by @Cyan4973
perf: improved compression of arrays of integers at high compression, by @Cyan4973
lib: reduce binary size with selective build-time exclusion, by @felixhandte
lib: improved huffman speed on small data and linux kernel, by @terrelln
lib: accept dictionaries with partial literal tables, by @terrelln
lib: fix CCtx size estimation with external sequence producer, by @embg
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
lib: fix several bugs in magicless-format decoding, by @embg
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
cli: improved time accuracy on BSD, by @felixhandte
cli: better errors on argument parsing, by @KapJI
tests: better compatibility with older versions of `grep`, by @Cyan4973
tests: lorem ipsum generator as default backup content, by @Cyan4973
build: cmake improvements by @terrelln, @sighingnow, @gjasny, @JohanMabille, @Saverio976, @gruenich, @teo-tsirpanis
build: bazel support, by @jondo2010
build: fix cross-compiling for AArch64 with lld by @jcelerier
build: fix Apple platform compatibility, by @nidhijaju
build: fix Visual 2012 and lower compatibility, by @Cyan4973
build: improve win32 support, by @DimitriPapadopoulos
build: better C90 compliance for zlibWrapper, by @emaste
port: make: fat binaries on macos, by @mredig
port: ARM64EC compatibility for Windows, by @dunhor
port: QNX support by @klausholstjacobsen
port: MSYS2 and Cygwin makefile installation and test support, by @QBos07
port: risc-v support validation in CI, by @Cyan4973
port: sparc64 support validation in CI, by @Cyan4973
port: AIX compatibility, by @likema
port: HP-UX compatibility, by @likema
doc: Improved specification accuracy, by @elasota
bug: Fix and deprecate ZSTD_generateSequences (#3981)
v1.5.5 (Apr 2023)
fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln)
perf: improve mid-level compression speed (#3529, #3533, #3543, @yoniko and #3552, @terrelln)
lib: deprecated bufferless block-level API (#3534) by @terrelln
cli: mmap large dictionaries to save memory, by @daniellerozenblit
cli: improve speed of --patch-from mode (~+50%) (#3545) by @daniellerozenblit
cli: improve i/o speed (~+10%) when processing lots of small files (#3479) by @felixhandte
cli: zstd no longer crashes when requested to write into write-protected directory (#3541) by @felixhandte
cli: fix decompression into block device using -o, reported by @georgmu (#3583)
build: fix zstd CLI compiled with lzma support but not zlib support (#3494) by @Hello71
build: fix cmake does no longer require 3.18 as minimum version (#3510) by @kou
build: fix MSVC+ClangCL linking issue (#3569) by @tru
build: fix zstd-dll, version of zstd CLI that links to the dynamic library (#3496) by @yoniko
build: fix MSVC warnings (#3495) by @embg
doc: updated zstd specification to clarify corner cases, by @Cyan4973
doc: document how to create fat binaries for macos (#3568) by @rickmark
misc: improve seekable format ingestion speed (~+100%) for very small chunk sizes (#3544) by @Cyan4973
misc: tests/fullbench can benchmark multiple files (#3516) by @dloidolt
v1.5.4 (Feb 2023)
perf: +20% faster huffman decompression for targets that can't compile x64 assembly (#3449, @terrelln)
perf: up to +10% faster streaming compression at levels 1-2 (#3114, @embg)
perf: +4-13% for levels 5-12 by optimizing function generation (#3295, @terrelln)
pref: +3-11% compression speed for `arm` target (#3199, #3164, #3145, #3141, #3138, @JunHe77 and #3139, #3160, @danlark1)
perf: +5-30% faster dictionary compression at levels 1-4 (#3086, #3114, #3152, @embg)
perf: +10-20% cold dict compression speed by prefetching CDict tables (#3177, @embg)
perf: +1% faster compression by removing a branch in ZSTD_fast_noDict (#3129, @felixhandte)
perf: Small compression ratio improvements in high compression mode (#2983, #3391, @Cyan4973 and #3285, #3302, @daniellerozenblit)
perf: small speed improvement by better detecting `STATIC_BMI2` for `clang` (#3080, @TocarIP)
perf: Improved streaming performance when `ZSTD_c_stableInBuffer` is set (#2974, @Cyan4973)
cli: Asynchronous I/O for improved cli speed (#2975, #2985, #3021, #3022, @yoniko)
cli: Change `zstdless` behavior to align with `zless` (#2909, @binhdvo)
cli: Keep original file if `-c` or `--stdout` is given (#3052, @dirkmueller)
cli: Keep original files when result is concatenated into a single output with `-o` (#3450, @Cyan4973)
cli: Preserve Permissions and Ownership of regular files (#3432, @felixhandte)
cli: Print zlib/lz4/lzma library versions with `-vv` (#3030, @terrelln)
cli: Print checksum value for single frame files with `-lv` (#3332, @Cyan4973)
cli: Print `dictID` when present with `-lv` (#3184, @htnhan)
cli: when `stderr` is *not* the console, disable status updates, but preserve final summary (#3458, @Cyan4973)
cli: support `--best` and `--no-name` in `gzip` compatibility mode (#3059, @dirkmueller)
cli: support for `posix` high resolution timer `clock_gettime()`, for improved benchmark accuracy (#3423, @Cyan4973)
cli: improved help/usage (`-h`, `-H`) formatting (#3094, @dirkmueller and #3385, @jonpalmisc)
cli: Fix better handling of bogus numeric values (#3268, @ctkhanhly)
cli: Fix input consists of multiple files _and_ `stdin` (#3222, @yoniko)
cli: Fix tiny files passthrough (#3215, @cgbur)
cli: Fix for `-r` on empty directory (#3027, @brailovich)
cli: Fix empty string as argument for `--output-dir-*` (#3220, @embg)
cli: Fix decompression memory usage reported by `-vv --long` (#3042, @u1f35c, and #3232, @zengyijing)
cli: Fix infinite loop when empty input is passed to trainer (#3081, @terrelln)
cli: Fix `--adapt` doesn't work when `--no-progress` is also set (#3354, @terrelln)
api: Support for Block-Level Sequence Producer (#3333, @embg)
api: Support for in-place decompression (#3432, @terrelln)
api: New `ZSTD_CCtx_setCParams()` function, set all parameters defined in a `ZSTD_compressionParameters` structure (#3403, @Cyan4973)
api: Streaming decompression detects incorrect header ID sooner (#3175, @Cyan4973)
api: Window size resizing optimization for edge case (#3345, @daniellerozenblit)
api: More accurate error codes for busy-loop scenarios (#3413, #3455, @Cyan4973)
api: Fix limit overflow in `compressBound` and `decompressBound` (#3362, #3373, Cyan4973) reported by @nigeltao
api: Deprecate several advanced experimental functions: streaming (#3408, @embg), copy (#3196, @mileshu)
bug: Fix corruption that rarely occurs in 32-bit mode with wlog=25 (#3361, @terrelln)
bug: Fix for block-splitter (#3033, @Cyan4973)
bug: Fixes for Sequence Compression API (#3023, #3040, @Cyan4973)
bug: Fix leaking thread handles on Windows (#3147, @animalize)
bug: Fix timing issues with cmake/meson builds (#3166, #3167, #3170, @Cyan4973)
build: Allow user to select legacy level for cmake (#3050, @shadchin)
build: Enable legacy support by default in cmake (#3079, @niamster)
build: Meson build script improvements (#3039, #3120, #3122, #3327, #3357, @eli-schwartz and #3276, @neheb)
build: Add aarch64 to supported architectures for zstd_trace (#3054, @ooosssososos)
build: support AIX architecture (#3219, @qiongsiwu)
build: Fix `ZSTD_LIB_MINIFY` build macro, which now reduces static library size by half (#3366, @terrelln)
build: Fix Windows issues with Multithreading translation layer (#3364, #3380, @yoniko) and ARM64 target (#3320, @cwoffenden)
build: Fix `cmake` script (#3382, #3392, @terrelln and #3252 @Tachi107 and #3167 @Cyan4973)
doc: Updated man page, providing more details for `--train` mode (#3112, @Cyan4973)
doc: Add decompressor errata document (#3092, @terrelln)
misc: Enable Intel CET (#2992, #2994, @hjl-tools)
misc: Fix `contrib/` seekable format (#3058, @yhoogstrate and #3346, @daniellerozenblit)
misc: Improve speed of the one-file library generator (#3241, @wahern and #3005, @cwoffenden)
v1.5.3 (dev version, unpublished)
v1.5.2 (Jan, 2022)
perf: Regain Minimal memset()-ing During Reuse of Compression Contexts (@Cyan4973, #2969)
build: Build Zstd with `noexecstack` on All Architectures (@felixhandte, #2964)
@@ -19,7 +161,7 @@ build: support for m68k (Motorola 68000's), by @cyan4973
build: improved AIX support, by @Helflym
build: improved meson unofficial build, by @eli-schwartz
cli : custom memory limit when training dictionary (#2925), by @embg
cli : report advanced parameters information when compressing in very verbose mode (``-vv`), by @Svetlitski-FB
cli : report advanced parameters information when compressing in very verbose mode (`-vv`), by @Svetlitski-FB
v1.5.0 (May 11, 2021)
api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
@@ -86,7 +228,7 @@ api: Add Function to Generate Skippable Frame (#2439, @senhuang42)
perf: New Algorithms for the Long Distance Matcher (#2483, @mpu)
perf: Performance Improvements for Long Distance Matcher (#2464, @mpu)
perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln)
cli: Fix `--output-dir-mirror`'s Rejection of `..`-Containing Paths (#2512, @felixhandte)
cli: Fix `--output-dir-mirror` rejection of `..` -containing paths (#2512, @felixhandte)
cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte)
cli: Improve Help Message (#2500, @senhuang42)
tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973)
@@ -373,7 +515,7 @@ misc: added /contrib/docker script by @gyscos
v1.3.3 (Dec 21, 2017)
perf: faster zstd_opt strategy (levels 16-19)
fix : bug #944 : multithreading with shared ditionary and large data, reported by @gsliepen
fix : bug #944 : multithreading with shared dictionary and large data, reported by @gsliepen
cli : fix : content size written in header by default
cli : fix : improved LZ4 format support, by @felixhandte
cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file
+14 -14
View File
@@ -7,7 +7,7 @@ New versions are being developed in the "dev" branch,
or in their own feature branch.
When they are deemed ready for a release, they are merged into "release".
As a consequences, all contributions must stage first through "dev"
As a consequence, all contributions must stage first through "dev"
or their own feature branch.
## Pull Requests
@@ -60,7 +60,7 @@ Our contribution process works in three main stages:
* Note: run local tests to ensure that your changes didn't break existing functionality
* Quick check
```
make shortest
make check
```
* Longer check
```
@@ -134,11 +134,11 @@ It can be useful to look at additional static analyzers once in a while (and we
- Static analyzers are full of false positive. The signal to noise ratio is actually pretty low.
- A good CI policy is "zero-warning tolerance". That means that all issues must be solved, including false positives. This quickly becomes a tedious workload.
- Multiple static analyzers will feature multiple kind of false positives, sometimes applying to the same code but in different ways leading to :
+ torteous code, trying to please multiple constraints, hurting readability and therefore maintenance. Sometimes, such complexity introduce other more subtle bugs, that are just out of scope of the analyzers.
+ tortuous code, trying to please multiple constraints, hurting readability and therefore maintenance. Sometimes, such complexity introduce other more subtle bugs, that are just out of scope of the analyzers.
+ sometimes, these constraints are mutually exclusive : if one try to solve one, the other static analyzer will complain, they can't be both happy at the same time.
- As if that was not enough, the list of false positives change with each version. It's hard enough to follow one static analyzer, but multiple ones with their own update agenda, this quickly becomes a massive velocity reducer.
This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kind of reports can be useful, and are accepted.
This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kinds of reports can be useful, and are accepted.
## Continuous Integration
CI tests run every time a pull request (PR) is created or updated. The exact tests
@@ -171,8 +171,8 @@ who want earlier signal.
| Cirrus CI | Used for testing on FreeBSD | https://github.com/marketplace/cirrus-ci/ | `.cirrus.yml` |
| Circle CI | Historically was used to provide faster signal,<br/> but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci <br> https://youtu.be/Js3hMUsSZ2c <br> https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` |
Note: the instructions linked above mostly cover how to set up a repository with CI from scratch.
The general idea should be the same for setting up CI on your fork of zstd, but you may have to
Note: the instructions linked above mostly cover how to set up a repository with CI from scratch.
The general idea should be the same for setting up CI on your fork of zstd, but you may have to
follow slightly different steps. In particular, please ignore any instructions related to setting up
config files (since zstd already has configs for each of these services).
@@ -197,7 +197,7 @@ something subtle merged is extensive benchmarking. You will be doing us a great
take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
your changes on every single processor and os out there (and neither will we) but do that best
you can:) We've adding some things to think about when benchmarking below in the Benchmarking
you can:) We've added some things to think about when benchmarking below in the Benchmarking
Performance section which might be helpful for you.
3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
legitimate thing to do as long as it does not harm the overall performance health of Zstd.
@@ -216,7 +216,7 @@ will typically not be stable enough to obtain reliable benchmark results. If you
hands on a desktop, this is usually a better scenario.
Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
do a little more work to ensure that you are in fact measuring the changes you've made not and
do a little more work to ensure that you are in fact measuring the changes you've made and not
noise. Here are some things you can do to make your benchmarks more stable:
1. The most simple thing you can do to drastically improve the stability of your benchmark is
@@ -273,7 +273,7 @@ for that options you have just provided. If you want to look at the internals of
benchmarking script works, you can check out programs/benchzstd.c
For example: say you have made a change that you believe improves the speed of zstd level 1. The
very first thing you should use to asses whether you actually achieved any sort of improvement
very first thing you should use to assess whether you actually achieved any sort of improvement
is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
specify a running time for your benchmark in seconds (default is 3 seconds).
Usually, the longer the running time, the more stable your results will be.
@@ -299,7 +299,7 @@ this method of evaluation will not be sufficient.
### Profiling
There are a number of great profilers out there. We're going to briefly mention how you can
profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
on windows.
on Windows.
Say you have an idea for a change that you think will provide some good performance gains
for level 1 compression on Zstd. Typically this means, you have identified a section of
@@ -315,8 +315,8 @@ might be).
Most profilers (including the profilers discussed below) will generate a call graph of
functions for you. Your goal will be to find your function of interest in this call graph
and then inspect the time spent inside of it. You might also want to to look at the
annotated assembly which most profilers will provide you with.
and then inspect the time spent inside of it. You might also want to look at the annotated
assembly which most profilers will provide you with.
#### Instruments
We will once again consider the scenario where you think you've identified a piece of code
@@ -330,7 +330,7 @@ Instruments.
* You will want a benchmark that runs for at least a few seconds (5 seconds will
usually be long enough). This way the profiler will have something to work with
and you will have ample time to attach your profiler to this process:)
* I will just use benchzstd as my bencharmking script for this example:
* I will just use benchzstd as my benchmarmking script for this example:
```
$ zstd -b1 -i5 <my-data> # this will run for 5 seconds
```
@@ -455,7 +455,7 @@ This design requirement is fundamental to preserve the portability of the code b
Any variable that can be `const` (aka. read-only) **must** be `const`.
Any pointer which content will not be modified must be `const`.
This property is then controlled at compiler level.
`const` variables are an important signal to readers that this variable isnt modified.
`const` variables are an important signal to readers that this variable isn't modified.
Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function.
* If a function must be inlined, mention it explicitly,
using project's own portable macros, such as `FORCE_INLINE_ATTR`,
+4 -4
View File
@@ -2,7 +2,7 @@ BSD License
For Zstandard software
Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
@@ -14,9 +14,9 @@ are permitted provided that the following conditions are met:
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name Facebook nor the names of its contributors may be used to
endorse or promote products derived from this software without specific
prior written permission.
* Neither the name Facebook, nor Meta, nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+90 -58
View File
@@ -1,5 +1,5 @@
# ################################################################
# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
@@ -85,14 +85,10 @@ test:
$(MAKE) -C $(TESTDIR) $@
ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder $@
## shortest: same as `make check`
.PHONY: shortest
shortest:
$(Q)$(MAKE) -C $(TESTDIR) $@
## check: run basic tests for `zstd` cli
.PHONY: check
check: shortest
check:
$(Q)$(MAKE) -C $(TESTDIR) $@
.PHONY: automated_benchmarking
automated_benchmarking:
@@ -123,6 +119,7 @@ contrib: lib
$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/seekable_format/tests test
$(MAKE) -C contrib/largeNbDicts all
$(MAKE) -C contrib/externalSequenceProducer all
cd build/single_file_libs/ ; ./build_decoder_test.sh
cd build/single_file_libs/ ; ./build_library_test.sh
@@ -142,14 +139,15 @@ clean:
$(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
$(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
$(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID)
$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
$(Q)$(RM) -r lz4
$(Q)$(RM) -r lz4 cmakebuild mesonbuild install
@echo Cleaning completed
#------------------------------------------------------------------------------
# make install is validated only for Linux, macOS, Hurd and some BSD targets
#------------------------------------------------------------------------------
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku AIX))
ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT% CYGWIN_NT% Haiku AIX,$(shell sh -c 'MSYSTEM="MSYS" uname') ))
HOST_OS = POSIX
@@ -157,7 +155,7 @@ MKDIR ?= mkdir -p
HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
EGREP_OPTIONS ?=
ifeq ($HAVE_COLORNEVER, 1)
ifeq ($(HAVE_COLORNEVER), 1)
EGREP_OPTIONS += --color=never
endif
EGREP = egrep $(EGREP_OPTIONS)
@@ -195,18 +193,27 @@ uninstall:
travis-install:
$(MAKE) install PREFIX=~/install_test_dir
.PHONY: clangbuild-darwin-fat
clangbuild-darwin-fat: clean
clang -v
CXX=clang++ CC=clang CFLAGS+="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch arm64" $(MAKE) zstd-release
mv programs/zstd programs/zstd_arm64
CXX=clang++ CC=clang CFLAGS+="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch x86_64" $(MAKE) zstd-release
mv programs/zstd programs/zstd_x64
lipo -create programs/zstd_x64 programs/zstd_arm64 -output programs/zstd
.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build
gcc5build: clean
gcc-5 -v
CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror"
CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)"
gcc6build: clean
gcc-6 -v
CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"
CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)"
gcc7build: clean
gcc-7 -v
CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)"
clangbuild: clean
clang -v
@@ -230,17 +237,17 @@ ppc64build: clean
.PHONY: armfuzz aarch64fuzz ppcfuzz ppc64fuzz
armfuzz: clean
CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest
aarch64fuzz: clean
ld -v
CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest
ppcfuzz: clean
CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest
ppc64fuzz: clean
CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest
.PHONY: cxxtest gcc5test gcc6test armtest aarch64test ppctest ppc64test
cxxtest: CXXFLAGS += -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
@@ -249,49 +256,49 @@ cxxtest: clean
gcc5test: clean
gcc-5 -v
$(MAKE) all CC=gcc-5 MOREFLAGS="-Werror"
$(MAKE) all CC=gcc-5 MOREFLAGS="-Werror $(MOREFLAGS)"
gcc6test: clean
gcc-6 -v
$(MAKE) all CC=gcc-6 MOREFLAGS="-Werror"
$(MAKE) all CC=gcc-6 MOREFLAGS="-Werror $(MOREFLAGS)"
armtest: clean
$(MAKE) -C $(TESTDIR) datagen # use native, faster
$(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
$(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
aarch64test:
$(MAKE) -C $(TESTDIR) datagen # use native, faster
$(MAKE) -C $(TESTDIR) test CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
$(MAKE) -C $(TESTDIR) test CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
ppctest: clean
$(MAKE) -C $(TESTDIR) datagen # use native, faster
$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static" FUZZER_FLAGS=--no-big-tests
$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
ppc64test: clean
$(MAKE) -C $(TESTDIR) datagen # use native, faster
$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests
$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
.PHONY: arm-ppc-compilation
arm-ppc-compilation:
$(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
$(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
$(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)"
$(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)"
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static $(MOREFLAGS)"
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static $(MOREFLAGS)"
regressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest
uasanregressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined -Werror" CXXFLAGS="-O3 -fsanitize=address,undefined -Werror"
msanregressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory -Werror" CXXFLAGS="-O3 -fsanitize=memory -Werror"
update_regressionResults : REGRESS_RESULTS_DIR := /tmp/regress_results_dir/
update_regressionResults:
$(MAKE) -C programs zstd
$(MAKE) -C tests/regression test
$(RM) -rf $(REGRESS_RESULTS_DIR)
$(MAKE) -j -C programs zstd
$(MAKE) -j -C tests/regression test
$(RM) -r $(REGRESS_RESULTS_DIR)
$(MKDIR) $(REGRESS_RESULTS_DIR)
./tests/regression/test \
--cache tests/regression/cache \
@@ -306,34 +313,37 @@ update_regressionResults:
# run UBsan with -fsanitize-recover=pointer-overflow
# this only works with recent compilers such as gcc 8+
usan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror"
$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=undefined -Werror $(MOREFLAGS)"
asan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror"
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror $(MOREFLAGS)"
asan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address -Werror" $(MAKE) -C $(TESTDIR) $*
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
msan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason
msan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
msan-%:
$(MAKE) clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $*
asan32: clean
$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address"
$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)"
uasan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror"
$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)"
uasan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror" $(MAKE) -C $(TESTDIR) $*
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $*
tsan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS=--no-big-tests
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)"
.PHONY: apt-install
apt-install:
# TODO: uncomment once issue 3011 is resolved and remove hack from Github Actions .yml
# sudo apt-get update
sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install $(APT_PACKAGES)
.PHONY: apt-add-repo
@@ -376,31 +386,53 @@ lz4install:
endif
CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
ifneq (,$(filter MSYS%,$(shell uname)))
ifneq (,$(filter MSYS%,$(shell sh -c 'MSYSTEM="MSYS" uname') ))
HOST_OS = MSYS
CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
endif
#------------------------------------------------------------------------
# target specific tests
#------------------------------------------------------------------------
ifneq (,$(filter $(HOST_OS),MSYS POSIX))
.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
cmakebuild:
cmake --version
$(RM) -r $(BUILDIR)/cmake/build
$(MKDIR) $(BUILDIR)/cmake/build
cd $(BUILDIR)/cmake/build; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) ..
$(MAKE) -C $(BUILDIR)/cmake/build -j4;
$(MAKE) -C $(BUILDIR)/cmake/build install;
$(MAKE) -C $(BUILDIR)/cmake/build uninstall;
cd $(BUILDIR)/cmake/build; ctest -V -L Medium
ifneq (,$(filter MSYS POSIX,$(HOST_OS)))
CMAKE ?= cmake
CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON
ifneq (,$(filter MSYS%,$(shell sh -c 'MSYSTEM="MSYS" uname')))
CMAKE_PARAMS = -G"MSYS Makefiles" -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
endif
.PHONY: cmakebuild
cmakebuild:
$(CMAKE) --version
$(RM) -r cmakebuild install
$(MKDIR) cmakebuild install
cd cmakebuild; $(CMAKE) -Wdev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Werror -O0" -DCMAKE_INSTALL_PREFIX=install $(CMAKE_PARAMS) ../build/cmake
$(CMAKE) --build cmakebuild --target install -- -j V=1
cd cmakebuild; ctest -V -L Medium
MESON ?= meson
NINJA ?= ninja
.PHONY: mesonbuild
mesonbuild:
$(MESON) setup \
--buildtype=debugoptimized \
-Db_lundef=false \
-Dauto_features=enabled \
-Dbin_programs=true \
-Dbin_tests=true \
-Dbin_contrib=true \
-Ddefault_library=both \
build/meson mesonbuild
$(NINJA) -C mesonbuild/
$(MESON) test -C mesonbuild/ --print-errorlogs
$(MESON) install -C mesonbuild --destdir staging/
.PHONY: c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
c89build: clean
$(CC) -v
CFLAGS="-std=c89 -Werror -O0" $(MAKE) allmost # will fail, due to missing support for `long long`
CFLAGS="-std=c89 -Werror -Wno-attributes -Wpedantic -Wno-long-long -Wno-variadic-macros -O0" $(MAKE) lib zstd
gnu90build: clean
$(CC) -v
+36
View File
@@ -0,0 +1,36 @@
// swift-tools-version:5.0
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
let package = Package(
name: "zstd",
platforms: [
.macOS(.v10_10), .iOS(.v9), .tvOS(.v9)
],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "libzstd",
targets: [ "libzstd" ])
],
dependencies: [
// Dependencies declare other packages that this package depends on.
// .package(url: /* package url */, from: "1.0.0"),
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "libzstd",
path: "lib",
sources: [ "common", "compress", "decompress", "dictBuilder" ],
publicHeadersPath: ".",
cSettings: [
.headerSearchPath(".")
])
],
swiftLanguageVersions: [.v5],
cLanguageStandard: .gnu11,
cxxLanguageStandard: .gnucxx14
)
+66 -28
View File
@@ -5,23 +5,20 @@ targeting real-time compression scenarios at zlib-level and better compression r
It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
Zstandard's format is stable and documented in [RFC8878](https://datatracker.ietf.org/doc/html/rfc8878). Multiple independent implementations are already available.
This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
This repository represents the reference implementation, provided as an open-source dual [BSD](LICENSE) OR [GPLv2](COPYING) licensed **C** library,
and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
Should your project require another programming language,
a list of known ports and bindings is provided on [Zstandard homepage](http://www.zstd.net/#other-languages).
a list of known ports and bindings is provided on [Zstandard homepage](https://facebook.github.io/zstd/#other-languages).
**Development branch status:**
[![Build Status][travisDevBadge]][travisLink]
[![Build status][AppveyorDevBadge]][AppveyorLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://api.travis-ci.com/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.com/facebook/zstd
[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/xt38wbdxjk5mrbem/branch/dev?svg=true "Windows test suite"
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
@@ -32,36 +29,35 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
## Benchmarks
For reference, several fast compression algorithms were tested and compared
on a desktop running Ubuntu 20.04 (`Linux 5.11.0-41-generic`),
with a Core i7-9700K CPU @ 4.9GHz,
on a desktop featuring a Core i7-9700K CPU @ 4.9GHz
and running Ubuntu 20.04 (`Linux ubu20 5.15.0-101-generic`),
using [lzbench], an open-source in-memory benchmark by @inikep
compiled with [gcc] 9.3.0,
compiled with [gcc] 9.4.0,
on the [Silesia compression corpus].
[lzbench]: https://github.com/inikep/lzbench
[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
[Silesia compression corpus]: https://sun.aei.polsl.pl//~sdeor/index.php?page=silesia
[gcc]: https://gcc.gnu.org/
| Compressor name | Ratio | Compression| Decompress.|
| --------------- | ------| -----------| ---------- |
| **zstd 1.5.1 -1** | 2.887 | 530 MB/s | 1700 MB/s |
| **zstd 1.5.6 -1** | 2.887 | 510 MB/s | 1580 MB/s |
| [zlib] 1.2.11 -1 | 2.743 | 95 MB/s | 400 MB/s |
| brotli 1.0.9 -0 | 2.702 | 395 MB/s | 450 MB/s |
| **zstd 1.5.1 --fast=1** | 2.437 | 600 MB/s | 2150 MB/s |
| **zstd 1.5.1 --fast=3** | 2.239 | 670 MB/s | 2250 MB/s |
| quicklz 1.5.0 -1 | 2.238 | 540 MB/s | 760 MB/s |
| **zstd 1.5.1 --fast=4** | 2.148 | 710 MB/s | 2300 MB/s |
| lzo1x 2.10 -1 | 2.106 | 660 MB/s | 845 MB/s |
| [lz4] 1.9.3 | 2.101 | 740 MB/s | 4500 MB/s |
| lzf 3.6 -1 | 2.077 | 410 MB/s | 830 MB/s |
| snappy 1.1.9 | 2.073 | 550 MB/s | 1750 MB/s |
| brotli 1.0.9 -0 | 2.702 | 395 MB/s | 430 MB/s |
| **zstd 1.5.6 --fast=1** | 2.437 | 545 MB/s | 1890 MB/s |
| **zstd 1.5.6 --fast=3** | 2.239 | 650 MB/s | 2000 MB/s |
| quicklz 1.5.0 -1 | 2.238 | 525 MB/s | 750 MB/s |
| lzo1x 2.10 -1 | 2.106 | 650 MB/s | 825 MB/s |
| [lz4] 1.9.4 | 2.101 | 700 MB/s | 4000 MB/s |
| lzf 3.6 -1 | 2.077 | 420 MB/s | 830 MB/s |
| snappy 1.1.9 | 2.073 | 530 MB/s | 1660 MB/s |
[zlib]: http://www.zlib.net/
[lz4]: http://www.lz4.org/
[zlib]: https://www.zlib.net/
[lz4]: https://lz4.github.io/lz4/
The negative compression levels, specified with `--fast=#`,
offer faster compression and decompression speed
at the cost of compression ratio (compared to level 1).
at the cost of compression ratio.
Zstd can also offer stronger compression ratios at the cost of compression speed.
Speed vs Compression trade-off is configurable by small increments.
@@ -124,14 +120,27 @@ Dictionary gains are mostly effective in the first few KB. Then, the compression
## Build instructions
`make` is the officially maintained build system of this project.
All other build systems are "compatible" and 3rd-party maintained,
they may feature small differences in advanced options.
When your system allows it, prefer using `make` to build `zstd` and `libzstd`.
### Makefile
If your system is compatible with standard `make` (or `gmake`),
invoking `make` in root directory will generate `zstd` cli in root directory.
It will also create `libzstd` into `lib/`.
Other available options include:
- `make install` : create and install zstd cli, library and man pages
- `make check` : create and run `zstd`, tests its behavior on local platform
- `make check` : create and run `zstd`, test its behavior on local platform
The `Makefile` follows the [GNU Standard Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html),
allowing staged install, standard flags, directory variables and command variables.
For advanced use cases, specialized compilation flags which control binary generation
are documented in [`lib/README.md`](lib/README.md#modular-build) for the `libzstd` library
and in [`programs/README.md`](programs/README.md#compilation-variables) for the `zstd` CLI.
### cmake
@@ -141,6 +150,18 @@ to create `zstd` binary, and `libzstd` dynamic and static libraries.
By default, `CMAKE_BUILD_TYPE` is set to `Release`.
#### Support for Fat (Universal2) Output
`zstd` can be built and installed with support for both Apple Silicon (M1/M2) as well as Intel by using CMake's Universal2 support.
To perform a Fat/Universal2 build and install use the following commands:
```bash
cmake -B build-cmake-debug -S build/cmake -G Ninja -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h;arm64"
cd build-cmake-debug
ninja
sudo ninja install
```
### Meson
A Meson project is provided within [`build/meson`](build/meson). Follow
@@ -163,6 +184,17 @@ You can build and install zstd [vcpkg](https://github.com/Microsoft/vcpkg/) depe
The zstd port in vcpkg is kept up to date by Microsoft team members and community contributors.
If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
### Conan
You can install pre-built binaries for zstd or build it from source using [Conan](https://conan.io/). Use the following command:
```bash
conan install --requires="zstd/[*]" --build=missing
```
The zstd Conan recipe is kept up to date by Conan maintainers and community contributors.
If the version is out of date, please [create an issue or pull request](https://github.com/conan-io/conan-center-index) on the ConanCenterIndex repository.
### Visual Studio (Windows)
Going into `build` directory, you will find additional possibilities:
@@ -176,20 +208,26 @@ Going into `build` directory, you will find additional possibilities:
You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
The output binary will be in `buck-out/gen/programs/`.
### Bazel
You easily can integrate zstd into your Bazel project by using the module hosted on the [Bazel Central Repository](https://registry.bazel.build/modules/zstd).
## Testing
You can run quick local smoke tests by executing the `playTest.sh` script from the `src/tests` directory.
Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the zstd and datagen binary.
For information on CI testing, please refer to TESTING.md
You can run quick local smoke tests by running `make check`.
If you can't use `make`, execute the `playTest.sh` script from the `src/tests` directory.
Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the `zstd` and `datagen` binary.
For information on CI testing, please refer to `TESTING.md`.
## Status
Zstandard is currently deployed within Facebook. It is used continuously to compress large amounts of data in multiple formats and use cases.
Zstandard is currently deployed within Facebook and many other large cloud infrastructures.
It is run continuously to compress large amounts of data in multiple formats and use cases.
Zstandard is considered safe for production environments.
## License
Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING).
Zstandard is dual-licensed under [BSD](LICENSE) OR [GPLv2](COPYING).
## Contributing
+15
View File
@@ -0,0 +1,15 @@
# Reporting and Fixing Security Issues
Please do not open GitHub issues or pull requests - this makes the problem immediately visible to everyone, including malicious actors. Security issues in this open source project can be safely reported via the Meta Bug Bounty program:
https://www.facebook.com/whitehat
Meta's security team will triage your report and determine whether or not is it eligible for a bounty under our program.
# Receiving Vulnerability Notifications
In the case that a significant security vulnerability is reported to us or discovered by us---without being publicly known---we will, at our discretion, notify high-profile, high-exposure users of Zstandard ahead of our public disclosure of the issue and associated fix.
If you believe your project would benefit from inclusion in this list, please reach out to one of the maintainers.
<!-- Note to maintainers: this list is kept [here](https://fburl.com/wiki/cgc1l62x). -->
+1 -1
View File
@@ -22,7 +22,7 @@ They consist of the following tests:
- `tests/playTests.sh --test-large-data`
- Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c`
- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode)
- Valgrind Test (`make -C tests valgrindTest`) (testing CLI and fuzzer under valgrind)
- Valgrind Test (`make -C tests test-valgrind`) (testing CLI and fuzzer under `valgrind`)
- Fuzzer tests (see above) on ARM, AArch64, PowerPC, and PowerPC64
Long Tests
-205
View File
@@ -1,205 +0,0 @@
# Following tests are run _only_ on `release` branch
# and on selected feature branch named `appveyorTest` or `visual*`
-
version: 1.0.{build}
branches:
only:
- release
- master
- /appveyor*/
- /visual*/
environment:
matrix:
- COMPILER: "gcc"
HOST: "mingw"
PLATFORM: "x64"
SCRIPT: "make allzstd MOREFLAGS=-static"
ARTIFACT: "true"
BUILD: "true"
- COMPILER: "gcc"
HOST: "mingw"
PLATFORM: "x86"
SCRIPT: "make allzstd MOREFLAGS=-static"
ARTIFACT: "true"
BUILD: "true"
- COMPILER: "clang-cl"
HOST: "cmake-visual"
PLATFORM: "x64"
CONFIGURATION: "Release"
CMAKE_GENERATOR: "Visual Studio 15 2017"
CMAKE_GENERATOR_PLATFORM: "x64"
CMAKE_GENERATOR_TOOLSET: "LLVM"
APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
install:
- ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
- SET PATH_ORIGINAL=%PATH%
- if [%HOST%]==[mingw] (
SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe &&
COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe
)
- IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
)
build_script:
- if [%HOST%]==[mingw] (
( if [%PLATFORM%]==[x64] (
SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
) else if [%PLATFORM%]==[x86] (
SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%"
) )
)
- if [%HOST%]==[mingw] if [%BUILD%]==[true] (
make -v &&
sh -c "%COMPILER% -v" &&
ECHO Building zlib to static link &&
SET "CC=%COMPILER%" &&
sh -c "cd .. && git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib" &&
sh -c "cd ../zlib && make -f win32/Makefile.gcc libz.a"
ECHO Building zstd &&
SET "CPPFLAGS=-I../../zlib" &&
SET "LDFLAGS=../../zlib/libz.a" &&
sh -c "%SCRIPT%" &&
( if [%COMPILER%]==[gcc] if [%ARTIFACT%]==[true]
ECHO Creating artifacts &&
ECHO %cd% &&
lib\dll\example\build_package.bat &&
make -C programs DEBUGFLAGS= clean zstd &&
cd programs\ && 7z a -tzip -mx9 zstd-win-binary-%PLATFORM%.zip zstd.exe &&
appveyor PushArtifact zstd-win-binary-%PLATFORM%.zip &&
cp zstd.exe ..\bin\zstd.exe &&
git clone --depth 1 --branch release https://github.com/facebook/zstd &&
cd zstd &&
git archive --format=tar release -o zstd-src.tar &&
..\zstd -19 zstd-src.tar &&
appveyor PushArtifact zstd-src.tar.zst &&
certUtil -hashfile zstd-src.tar.zst SHA256 > zstd-src.tar.zst.sha256.sig &&
appveyor PushArtifact zstd-src.tar.zst.sha256.sig &&
cd ..\..\bin\ &&
7z a -tzip -mx9 zstd-win-release-%PLATFORM%.zip * &&
appveyor PushArtifact zstd-win-release-%PLATFORM%.zip
)
)
- if [%HOST%]==[cmake-visual] (
ECHO *** &&
ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
PUSHD build\cmake &&
cmake -DBUILD_TESTING=ON . &&
cmake --build . --config %CONFIGURATION% -j4 &&
POPD &&
ECHO ***
)
test_script:
- ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
- SET "CC=gcc"
- SET "CXX=g++"
- if [%TEST%]==[cmake] (
mkdir build\cmake\build &&
cd build\cmake\build &&
SET FUZZERTEST=-T2mn &&
SET ZSTREAM_TESTTIME=-T2mn &&
cmake -G "Visual Studio 14 2015 Win64" .. &&
cd ..\..\.. &&
make clean
)
# The following tests are for regular pushes
# into `dev` or some feature branch
# There run less tests, for shorter feedback loop
-
version: 1.0.{build}
environment:
matrix:
- COMPILER: "visual"
HOST: "visual"
PLATFORM: "x64"
CONFIGURATION: "Debug"
- COMPILER: "visual"
HOST: "visual"
PLATFORM: "Win32"
CONFIGURATION: "Debug"
- COMPILER: "visual"
HOST: "visual"
PLATFORM: "x64"
CONFIGURATION: "Release"
- COMPILER: "visual"
HOST: "visual"
PLATFORM: "Win32"
CONFIGURATION: "Release"
- COMPILER: "gcc"
HOST: "cygwin"
PLATFORM: "x64"
- COMPILER: "clang-cl"
HOST: "cmake-visual"
PLATFORM: "x64"
CONFIGURATION: "Release"
CMAKE_GENERATOR: "Visual Studio 15 2017"
CMAKE_GENERATOR_PLATFORM: "x64"
CMAKE_GENERATOR_TOOLSET: "LLVM"
APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
install:
- ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
- SET PATH_ORIGINAL=%PATH%
- if [%HOST%]==[cygwin] (
ECHO Installing Cygwin Packages &&
C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
gcc,^
cmake,^
make
)
- IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
)
build_script:
- ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION%
- if [%HOST%]==[cygwin] (
set CHERE_INVOKING=yes &&
set CC=%COMPILER% &&
C:\cygwin64\bin\bash --login -c "
set -e;
cd build/cmake;
CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .;
make VERBOSE=1 -j;
ctest -V -L Medium;
"
)
- if [%HOST%]==[cmake-visual] (
ECHO *** &&
ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
PUSHD build\cmake &&
cmake -DBUILD_TESTING=ON . &&
cmake --build . --config %CONFIGURATION% -j4 &&
POPD &&
ECHO ***
)
- if [%HOST%]==[visual] (
ECHO *** &&
ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
ECHO *** &&
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe
)
test_script:
- ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
- SET "FUZZERTEST=-T10s"
- if [%HOST%]==[mingw] (
set "CC=%COMPILER%" &&
make clean &&
make check
)
+3 -2
View File
@@ -5,8 +5,9 @@ This directory contains material defining the Zstandard format,
as well as detailed instructions to use `zstd` library.
__`zstd_manual.html`__ : Documentation of `zstd.h` API, in html format.
Click on this link: [http://zstd.net/zstd_manual.html](http://zstd.net/zstd_manual.html)
to display documentation of latest release in readable format within a browser.
Unfortunately, Github doesn't display `html` files in parsed format, just as source code.
For a readable display of html API documentation of latest release,
use this link: [https://raw.githack.com/facebook/zstd/release/doc/zstd_manual.html](https://raw.githack.com/facebook/zstd/release/doc/zstd_manual.html) .
__`zstd_compression_format.md`__ : This document defines the Zstandard compression format.
Compliant decoders must adhere to this document,
+148
View File
@@ -0,0 +1,148 @@
Decompressor Errata
===================
This document captures known decompressor bugs, where the decompressor rejects a valid zstd frame.
Each entry will contain:
1. The last affected decompressor versions.
2. The decompressor components affected.
2. Whether the compressed frame could ever be produced by the reference compressor.
3. An example frame (hexadecimal string when it can be short enough, link to golden file otherwise)
4. A description of the bug.
The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first.
No sequence using the 2-bytes format
------------------------------------------------
**Last affected version**: v1.5.5
**Affected decompressor component(s)**: Library & CLI
**Produced by the reference compressor**: No
**Example Frame**: see zstd/tests/golden-decompression/zeroSeq_2B.zst
The zstd decoder incorrectly expects FSE tables when there are 0 sequences present in the block
if the value 0 is encoded using the 2-bytes format.
Instead, it should immediately end the sequence section, and move on to next block.
This situation was never generated by the reference compressor,
because representing 0 sequences with the 2-bytes format is inefficient
(the 1-byte format is always used in this case).
Compressed block with a size of exactly 128 KB
------------------------------------------------
**Last affected version**: v1.5.2
**Affected decompressor component(s)**: Library & CLI
**Produced by the reference compressor**: No
**Example Frame**: see zstd/tests/golden-decompression/block-128k.zst
The zstd decoder incorrectly rejected blocks of type `Compressed_Block` when their size was exactly 128 KB.
Note that `128 KB - 1` was accepted, and `128 KB + 1` is forbidden by the spec.
This type of block was never generated by the reference compressor.
These blocks used to be disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689).
> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block).
Compressed block with 0 literals and 0 sequences
------------------------------------------------
**Last affected version**: v1.5.2
**Affected decompressor component(s)**: Library & CLI
**Produced by the reference compressor**: No
**Example Frame**: `28b5 2ffd 2000 1500 0000 00`
The zstd decoder incorrectly rejected blocks of type `Compressed_Block` that encodes literals as `Raw_Literals_Block` with no literals, and has no sequences.
This type of block was never generated by the reference compressor.
Additionally, these blocks were disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689).
> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block).
First block is RLE block
------------------------
**Last affected version**: v1.4.3
**Affected decompressor component(s)**: CLI only
**Produced by the reference compressor**: No
**Example Frame**: `28b5 2ffd a001 0002 0002 0010 000b 0000 00`
The zstd CLI decompressor rejected cases where the first block was an RLE block whose `Block_Size` is 131072, and the frame contains more than one block.
This only affected the zstd CLI, and not the library.
The example is an RLE block with 131072 bytes, followed by a second RLE block with 1 byte.
The compressor currently works around this limitation by explicitly avoiding producing RLE blocks as the first
block.
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L3527-L3535
Tiny FSE Table & Block
----------------------
**Last affected version**: v1.3.4
**Affected decompressor component(s)**: Library & CLI
**Produced by the reference compressor**: Possibly until version v1.3.4, but probably never
**Example Frame**: `28b5 2ffd 2027 c500 0080 f3f1 f0ec ebc6 c5c7 f09d 4300 0000 e0e0 0658 0100 603e 52`
The zstd library rejected blocks of type `Compressed_Block` whose offset of the last table with type `FSE_Compressed_Mode` was less than 4 bytes from the end of the block.
In more depth, let `Last_Table_Offset` be the offset in the compressed block (excluding the header) that
the last table with type `FSE_Compressed_Mode` started. If `Block_Content - Last_Table_Offset < 4` then
the buggy zstd decompressor would reject the block. This occurs when the last serialized table is 2 bytes
and the bitstream size is 1 byte.
For example:
* There is 1 sequence in the block
* `Literals_Lengths_Mode` is `FSE_Compressed_Mode` & the serialized table size is 2 bytes
* `Offsets_Mode` is `Predefined_Mode`
* `Match_Lengths_Mode` is `Predefined_Mode`
* The bitstream is 1 byte. E.g. there is only one sequence and it fits in 1 byte.
The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
See the compressor workaround code:
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682
Magicless format
----------------------
**Last affected version**: v1.5.5
**Affected decompressor component(s)**: Library
**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)
**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`
v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
These include but are not limited to:
* Valid frames that happen to begin with a legacy magic number (little-endian)
* Valid frames that happen to begin with a skippable magic number (little-endian)
If you are affected by this issue and cannot update to v1.5.6 or later, there is a
workaround to recover affected data. Simply prepend the ZSTD magic number
`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
decoder.
@@ -0,0 +1,80 @@
Decompressor Permissiveness to Invalid Data
===========================================
This document describes the behavior of the reference decompressor in cases
where it accepts formally invalid data instead of reporting an error.
While the reference decompressor *must* decode any compliant frame following
the specification, its ability to detect erroneous data is on a best effort
basis: the decoder may accept input data that would be formally invalid,
when it causes no risk to the decoder, and which detection would cost too much
complexity or speed regression.
In practice, the vast majority of invalid data are detected, if only because
many corruption events are dangerous for the decoder process (such as
requesting an out-of-bound memory access) and many more are easy to check.
This document lists a few known cases where invalid data was formerly accepted
by the decoder, and what has changed since.
Truncated Huffman states
------------------------
**Last affected version**: v1.5.6
**Produced by the reference compressor**: No
**Example Frame**: `28b5 2ffd 0000 5500 0072 8001 0420 7e1f 02aa 00`
When using FSE-compressed Huffman weights, the compressed weight bitstream
could contain fewer bits than necessary to decode the initial states.
The reference decompressor up to v1.5.6 will decode truncated or missing
initial states as zero, which can result in a valid Huffman tree if only
the second state is truncated.
In newer versions, truncated initial states are reported as a corruption
error by the decoder.
Offset == 0
-----------
**Last affected version**: v1.5.5
**Produced by the reference compressor**: No
**Example Frame**: `28b5 2ffd 0000 4500 0008 0002 002f 430b ae`
If a sequence is decoded with `literals_length = 0` and `offset_value = 3`
while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is
invalid.
The reference decompressor up to v1.5.5 processes this case as if the computed
offset was `1`, including inserting `1` into the repeated offset list.
This prevents the output buffer from remaining uninitialized, thus denying a
potential attack vector from an untrusted source.
However, in the rare case where this scenario would be the outcome of a
transmission or storage error, the decoder relies on the checksum to detect
the error.
In newer versions, this case is always detected and reported as a corruption error.
Non-zeroes reserved bits
------------------------
**Last affected version**: v1.5.5
**Produced by the reference compressor**: No
The Sequences section of each block has a header, and one of its elements is a
byte, which describes the compression mode of each symbol.
This byte contains 2 reserved bits which must be set to zero.
The reference decompressor up to v1.5.5 just ignores these 2 bits.
This behavior has no consequence for the rest of the frame decoding process.
In newer versions, the 2 reserved bits are actively checked for value zero,
and the decoder reports a corruption error if they are not.
@@ -1,5 +1,5 @@
# ################################################################
# Copyright (c) Yann Collet, Facebook, Inc.
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
@@ -10,7 +10,7 @@
ZSTD ?= zstd # note: requires zstd installation on local system
UNAME?= $(shell uname)
UNAME?= $(shell sh -c 'MSYSTEM="MSYS" uname')
ifeq ($(UNAME), SunOS)
DIFF ?= gdiff
else
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -997,7 +997,8 @@ static void decompress_sequences(frame_context_t *const ctx,
const size_t num_sequences);
static sequence_command_t decode_sequence(sequence_states_t *const state,
const u8 *const src,
i64 *const offset);
i64 *const offset,
int lastSequence);
static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
const seq_part_t type, const seq_mode_t mode);
@@ -1017,12 +1018,7 @@ static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
// This is a variable size field using between 1 and 3 bytes. Let's call its
// first byte byte0."
u8 header = IO_read_bits(in, 8);
if (header == 0) {
// "There are no sequences. The sequence section stops there.
// Regenerated content is defined entirely by literals section."
*sequences = NULL;
return 0;
} else if (header < 128) {
if (header < 128) {
// "Number_of_Sequences = byte0 . Uses 1 byte."
num_sequences = header;
} else if (header < 255) {
@@ -1033,6 +1029,12 @@ static size_t decode_sequences(frame_context_t *const ctx, istream_t *in,
num_sequences = IO_read_bits(in, 16) + 0x7F00;
}
if (num_sequences == 0) {
// "There are no sequences. The sequence section stops there."
*sequences = NULL;
return 0;
}
*sequences = malloc(num_sequences * sizeof(sequence_command_t));
if (!*sequences) {
BAD_ALLOC();
@@ -1114,7 +1116,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
for (size_t i = 0; i < num_sequences; i++) {
// Decode sequences one by one
sequences[i] = decode_sequence(&states, src, &bit_offset);
sequences[i] = decode_sequence(&states, src, &bit_offset, i==num_sequences-1);
}
if (bit_offset != 0) {
@@ -1125,7 +1127,8 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
// Decode a single sequence and update the state
static sequence_command_t decode_sequence(sequence_states_t *const states,
const u8 *const src,
i64 *const offset) {
i64 *const offset,
int lastSequence) {
// "Each symbol is a code in its own context, which specifies Baseline and
// Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw
// additional bits in the same bitstream."
@@ -1160,7 +1163,7 @@ static sequence_command_t decode_sequence(sequence_states_t *const states,
// Literals_Length_State is updated, followed by Match_Length_State, and
// then Offset_State."
// If the stream is complete don't read bits to update state
if (*offset != 0) {
if (!lastSequence) {
FSE_update_state(&states->ll_table, &states->ll_state, src, offset);
FSE_update_state(&states->ml_table, &states->ml_state, src, offset);
FSE_update_state(&states->of_table, &states->of_state, src, offset);
@@ -1210,7 +1213,7 @@ static void decode_seq_table(FSE_dtable *const table, istream_t *const in,
break;
}
case seq_repeat:
// "Repeat_Mode : re-use distribution table from previous compressed
// "Repeat_Mode : reuse distribution table from previous compressed
// block."
// Nothing to do here, table will be unchanged
if (!table->symbols) {
@@ -1399,7 +1402,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
/******* END OUTPUT SIZE COUNTING *********************************************/
/******* DICTIONARY PARSING ***************************************************/
dictionary_t* create_dictionary() {
dictionary_t* create_dictionary(void) {
dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
if (!dict) {
BAD_ALLOC();
@@ -1890,7 +1893,7 @@ static size_t HUF_decompress_4stream(const HUF_dtable *const dtable,
/// Initializes a Huffman table using canonical Huffman codes
/// For more explanation on canonical Huffman codes see
/// http://www.cs.uofs.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html
/// https://www.cs.scranton.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html
/// Codes within a level are allocated in symbol order (i.e. smaller symbols get
/// earlier codes)
static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits,
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 13 KiB

+217 -138
View File
@@ -3,7 +3,7 @@ Zstandard Compression Format
### Notices
Copyright (c) 2016-2021 Yann Collet, Facebook, Inc.
Copyright (c) Meta Platforms, Inc. and affiliates.
Permission is granted to copy and distribute this document
for any purpose and without charge,
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version
0.3.7 (2020-12-09)
0.4.3 (2024-10-07)
Introduction
@@ -26,7 +26,7 @@ The purpose of this document is to define a lossless compressed data format,
that is independent of CPU type, operating system,
file system and character set, suitable for
file compression, pipe and streaming compression,
using the [Zstandard algorithm](http://www.zstandard.org).
using the [Zstandard algorithm](https://facebook.github.io/zstd/).
The text of the specification assumes a basic background in programming
at the level of bits and other primitive data representations.
@@ -35,7 +35,7 @@ even for an arbitrarily long sequentially presented input data stream,
using only an a priori bounded amount of intermediate storage,
and hence can be used in data communications.
The format uses the Zstandard compression method,
and optional [xxHash-64 checksum method](http://www.xxhash.org),
and optional [xxHash-64 checksum method](https://cyan4973.github.io/xxHash/),
for detection of data corruption.
The data format defined by this specification
@@ -134,7 +134,7 @@ __`Content_Checksum`__
An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
The content checksum is the result
of [xxh64() hash function](http://www.xxhash.org)
of [xxh64() hash function](https://cyan4973.github.io/xxHash/)
digesting the original (decoded) data as input, and a seed of zero.
The low 4 bytes of the checksum are stored in __little-endian__ format.
@@ -435,7 +435,7 @@ They can be decoded first, and then copied during [Sequence Execution],
or they can be decoded on the flow during [Sequence Execution].
Literals can be stored uncompressed or compressed using Huffman prefix codes.
When compressed, an optional tree description can be present,
When compressed, a tree description may optionally be present,
followed by 1 or 4 streams.
| `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
@@ -470,6 +470,7 @@ This field uses 2 lowest bits of first byte, describing 4 different block types
repeated `Regenerated_Size` times.
- `Compressed_Literals_Block` - This is a standard Huffman-compressed block,
starting with a Huffman tree description.
In this mode, there are at least 2 different literals represented in the Huffman tree description.
See details below.
- `Treeless_Literals_Block` - This is a Huffman-compressed block,
using Huffman tree _from previous Huffman-compressed literals block_.
@@ -510,7 +511,7 @@ Its value is : `Size_Format = (Literals_Section_Header[0]>>2) & 3`
`Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)`
Only Stream1 is present for these cases.
Note : it's allowed to represent a short value (for example `13`)
Note : it's allowed to represent a short value (for example `27`)
using a long format, even if it's less efficient.
__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
@@ -521,18 +522,37 @@ __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
`Literals_Section_Header` uses 3 bytes.
- `Size_Format` == 01 : 4 streams.
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023).
`Literals_Section_Header` uses 3 bytes.
- `Size_Format` == 10 : 4 streams.
Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383).
Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383).
`Literals_Section_Header` uses 4 bytes.
- `Size_Format` == 11 : 4 streams.
Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143).
`Literals_Section_Header` uses 5 bytes.
Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
_when_ it is present.
Note 2: `Compressed_Size` can never be `==0`.
Even in single-stream scenario, assuming an empty content, it must be `>=1`,
since it contains at least the final end bit flag.
In 4-streams scenario, a valid `Compressed_Size` is necessarily `>= 10`
(6 bytes for the jump table, + 4x1 bytes for the 4 streams).
4 streams is faster than 1 stream in decompression speed,
by exploiting instruction level parallelism.
But it's also more expensive,
costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table.
In general, use the 4 streams mode when there are more literals to decode,
to favor higher decompression speeds.
Note that beyond >1KB of literals, the 4 streams mode is compulsory.
Note that a minimum of 6 bytes is required for the 4 streams mode.
That's a technical minimum, but it's not recommended to employ the 4 streams mode
for such a small quantity, that would be wasteful.
A more practical lower bound would be around ~256 bytes.
#### Raw Literals Block
The data in Stream1 is `Regenerated_Size` bytes long,
@@ -552,6 +572,7 @@ or from a dictionary.
### `Huffman_Tree_Description`
This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`).
The tree describes the weights of all literals symbols that can be present in the literals block, at least 2 and up to 256.
The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description).
The size of `Huffman_Tree_Description` is determined during decoding process,
it must be used to determine where streams begin.
@@ -561,10 +582,10 @@ it must be used to determine where streams begin.
### Jump Table
The Jump Table is only present when there are 4 Huffman-coded streams.
Reminder : Huffman compressed data consists of either 1 or 4 Huffman-coded streams.
Reminder : Huffman compressed data consists of either 1 or 4 streams.
If only one stream is present, it is a single bitstream occupying the entire
remaining portion of the literals block, encoded as described within
remaining portion of the literals block, encoded as described in
[Huffman-Coded Streams](#huffman-coded-streams).
If there are four streams, `Literals_Section_Header` only provided
@@ -575,17 +596,18 @@ except for the last stream which may be up to 3 bytes smaller,
to reach a total decompressed size as specified in `Regenerated_Size`.
The compressed size of each stream is provided explicitly in the Jump Table.
Jump Table is 6 bytes long, and consist of three 2-byte __little-endian__ fields,
Jump Table is 6 bytes long, and consists of three 2-byte __little-endian__ fields,
describing the compressed sizes of the first three streams.
`Stream4_Size` is computed from total `Total_Streams_Size` minus sizes of other streams.
`Stream4_Size` is computed from `Total_Streams_Size` minus sizes of other streams:
`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`.
Note: if `Stream1_Size + Stream2_Size + Stream3_Size > Total_Streams_Size`,
`Stream4_Size` is necessarily `>= 1`. Therefore,
if `Total_Streams_Size < Stream1_Size + Stream2_Size + Stream3_Size + 6 + 1`,
data is considered corrupted.
Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream,
as described at [Huffman-Coded Streams](#huffman-coded-streams)
as described in [Huffman-Coded Streams](#huffman-coded-streams)
Sequences Section
@@ -628,13 +650,15 @@ __`Number_of_Sequences`__
This is a variable size field using between 1 and 3 bytes.
Let's call its first byte `byte0`.
- `if (byte0 == 0)` : there are no sequences.
The sequence section stops there.
Decompressed content is defined entirely as Literals Section content.
The FSE tables used in `Repeat_Mode` aren't updated.
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0 - 0x80) << 8) + byte1`. Uses 2 bytes.
Note that the 2 bytes format fully overlaps the 1 byte format.
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00`. Uses 3 bytes.
`if (Number_of_Sequences == 0)` : there are no sequences.
The sequence section stops immediately,
FSE tables used in `Repeat_Mode` aren't updated.
Block's decompressed content is defined solely by the Literals Section content.
__Symbol compression modes__
@@ -905,7 +929,10 @@ There is an exception though, when current sequence's `literals_length = 0`.
In this case, repeated offsets are shifted by one,
so an `offset_value` of 1 means `Repeated_Offset2`,
an `offset_value` of 2 means `Repeated_Offset3`,
and an `offset_value` of 3 means `Repeated_Offset1 - 1_byte`.
and an `offset_value` of 3 means `Repeated_Offset1 - 1`.
In the final case, if `Repeated_Offset1 - 1` evaluates to 0, then the
data is considered corrupted.
For the first block, the starting offset history is populated with following values :
`Repeated_Offset1`=1, `Repeated_Offset2`=4, `Repeated_Offset3`=8,
@@ -945,14 +972,14 @@ sequences are applied to them:
|:--------------:|:-----------------:|:------------------:|:------------------:|:------------------:|:-----------------------:|
| | | 1 | 4 | 8 | starting values |
| 1114 | 11 | 1111 | 1 | 4 | non-repeat |
| 1 | 22 | 1111 | 1 | 4 | repeat 1; no change |
| 1 | 22 | 1111 | 1 | 4 | repeat 1: no change |
| 2225 | 22 | 2222 | 1111 | 1 | non-repeat |
| 1114 | 111 | 1111 | 2222 | 1111 | non-repeat |
| 3336 | 33 | 3333 | 1111 | 2222 | non-repeat |
| 2 | 22 | 1111 | 3333 | 2222 | repeat 2; swap 1 & 2 |
| 3 | 33 | 2222 | 1111 | 3333 | repeat 3; rotate 3 to 1 |
| 3 | 0 | 2221 | 2222 | 1111 | insert resolved offset |
| 1 | 0 | 2222 | 2221 | 3333 | repeat 2 |
| 2 | 22 | 1111 | 3333 | 2222 | repeat 2: swap 1 & 2 |
| 3 | 33 | 2222 | 1111 | 3333 | repeat 3: rotate 3 to 1 |
| 3 | 0 | 2221 | 2222 | 1111 | special case : insert `repeat1 - 1` |
| 1 | 0 | 2222 | 2221 | 1111 | == repeat 2 |
Skippable Frames
@@ -967,14 +994,14 @@ into a flow of concatenated frames.
Skippable frames defined in this specification are compatible with [LZ4] ones.
[LZ4]:http://www.lz4.org
[LZ4]:https://lz4.github.io/lz4/
From a compliant decoder perspective, skippable frames need just be skipped,
and their content ignored, resuming decoding after the skippable frame.
It can be noted that a skippable frame
can be used to watermark a stream of concatenated frames
embedding any kind of tracking information (even just an UUID).
embedding any kind of tracking information (even just a UUID).
Users wary of such possibility should scan the stream of concatenated frames
in an attempt to detect such frame for analysis or removal.
@@ -1011,55 +1038,57 @@ and to compress Huffman headers.
FSE
---
FSE, short for Finite State Entropy, is an entropy codec based on [ANS].
FSE encoding/decoding involves a state that is carried over between symbols,
so decoding must be done in the opposite direction as encoding.
FSE encoding/decoding involves a state that is carried over between symbols.
Decoding must be done in the opposite direction as encoding.
Therefore, all FSE bitstreams are read from end to beginning.
Note that the order of the bits in the stream is not reversed,
we just read the elements in the reverse order they are written.
we just read each multi-bits element in the reverse order they are encoded.
For additional details on FSE, see [Finite State Entropy].
[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/
FSE decoding involves a decoding table which has a power of 2 size, and contain three elements:
FSE decoding is directed by a decoding table with a power of 2 size, each row containing three elements:
`Symbol`, `Num_Bits`, and `Baseline`.
The `log2` of the table size is its `Accuracy_Log`.
An FSE state value represents an index in this table.
To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value.
The next symbol in the stream is the `Symbol` indicated in the table for that state.
The first symbol in the stream is the `Symbol` indicated in the table for that state.
To obtain the next state value,
the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`.
[ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems
### FSE Table Description
To decode FSE streams, it is necessary to construct the decoding table.
The Zstandard format encodes FSE table descriptions as follows:
To decode an FSE bitstream, it is necessary to build its FSE decoding table.
The decoding table is derived from a distribution of Probabilities.
The Zstandard format encodes distributions of Probabilities as follows:
An FSE distribution table describes the probabilities of all symbols
from `0` to the last present one (included)
on a normalized scale of `1 << Accuracy_Log` .
Note that there must be two or more symbols with nonzero probability.
The distribution of probabilities is described in a bitstream which is read forward,
in __little-endian__ fashion.
The amount of bytes consumed from the bitstream to describe the distribution
is discovered at the end of the decoding process.
It's a bitstream which is read forward, in __little-endian__ fashion.
It's not necessary to know bitstream exact size,
it will be discovered and reported by the decoding process.
The bitstream starts by reporting on which scale it operates.
The bitstream starts by reporting on which scale the distribution operates.
Let's `low4Bits` designate the lowest 4 bits of the first byte :
`Accuracy_Log = low4bits + 5`.
Then follows each symbol value, from `0` to last present one.
The number of bits used by each field is variable.
An FSE distribution table describes the probabilities of all symbols
from `0` to the last present one (included) in natural order.
The sum of probabilities is normalized to reach a power of 2 total of `1 << Accuracy_Log` .
There must be two or more symbols with non-zero probabilities.
The number of bits used to decode each probability is variable.
It depends on :
- Remaining probabilities + 1 :
__example__ :
Presuming an `Accuracy_Log` of 8,
and presuming 100 probabilities points have already been distributed,
and presuming 100 probability points have already been distributed,
the decoder may read any value from `0` to `256 - 100 + 1 == 157` (inclusive).
Therefore, it must read `log2sup(157) == 8` bits.
Therefore, it may read up to `log2sup(157) == 8` bits, where `log2sup(N)`
is the smallest integer `T` that satisfies `(1 << T) > N`.
- Value decoded : small values use 1 less bit :
__example__ :
@@ -1070,111 +1099,133 @@ It depends on :
values from 98 to 157 use 8 bits.
This is achieved through this scheme :
| Value read | Value decoded | Number of bits used |
| ---------- | ------------- | ------------------- |
| 0 - 97 | 0 - 97 | 7 |
| 98 - 127 | 98 - 127 | 8 |
| 128 - 225 | 0 - 97 | 7 |
| 226 - 255 | 128 - 157 | 8 |
| 8-bit field read | Value decoded | Nb of bits consumed |
| ---------------- | ------------- | ------------------- |
| 0 - 97 | 0 - 97 | 7 |
| 98 - 127 | 98 - 127 | 8 |
| 128 - 225 | 0 - 97 | 7 |
| 226 - 255 | 128 - 157 | 8 |
Symbols probabilities are read one by one, in order.
Probability is derived from Value decoded using the following formula:
`Probality = Value - 1`
Probability is obtained from Value decoded by following formula :
`Proba = value - 1`
Consequently, a Probability of `0` is described by a Value `1`.
It means value `0` becomes negative probability `-1`.
`-1` is a special probability, which means "less than 1".
Its effect on distribution table is described in the [next section].
For the purpose of calculating total allocated probability points, it counts as one.
A Value `0` is used to signal a special case, named "Probability `-1`".
It describes a probability which should have been "less than 1".
Its effect on the decoding table building process is described in the [next section].
For the purpose of counting total allocated probability points, it counts as one.
[next section]:#from-normalized-distribution-to-decoding-tables
When a symbol has a __probability__ of `zero`,
Symbols probabilities are read one by one, in order.
After each probability is decoded, the total nb of probability points is updated.
This is used to determine how many bits must be read to decode the probability of next symbol.
When a symbol has a __probability__ of `zero` (decoded from reading a Value `1`),
it is followed by a 2-bits repeat flag.
This repeat flag tells how many probabilities of zeroes follow the current one.
It provides a number ranging from 0 to 3.
If it is a 3, another 2-bits repeat flag follows, and so on.
When last symbol reaches cumulated total of `1 << Accuracy_Log`,
decoding is complete.
If the last symbol makes cumulated total go above `1 << Accuracy_Log`,
distribution is considered corrupted.
When the Probability for a symbol makes cumulated total reach `1 << Accuracy_Log`,
then it's the last symbol, and decoding is complete.
Then the decoder can tell how many bytes were used in this process,
and how many symbols are present.
The bitstream consumes a round number of bytes.
Any remaining bit within the last byte is just unused.
If this process results in a non-zero probability for a symbol outside of the
valid range of symbols that the FSE table is defined for, even if that symbol is
not used, then the data is considered corrupted.
For the specific case of offset codes,
a decoder implementation may reject a frame containing a non-zero probability
for an offset code larger than the largest offset code supported by the decoder
implementation.
#### From normalized distribution to decoding tables
The distribution of normalized probabilities is enough
The normalized distribution of probabilities is enough
to create a unique decoding table.
It follows the following build rule :
It is generated using the following build rule :
The table has a size of `Table_Size = 1 << Accuracy_Log`.
Each cell describes the symbol decoded,
and instructions to get the next state (`Number_of_Bits` and `Baseline`).
Each row specifies the decoded symbol,
and instructions to reach the next state (`Number_of_Bits` and `Baseline`).
Symbols are scanned in their natural order for "less than 1" probabilities.
Symbols with this probability are being attributed a single cell,
Symbols are first scanned in their natural order for "less than 1" probabilities
(previously decoded from a Value of `0`).
Symbols with this special probability are being attributed a single row,
starting from the end of the table and retreating.
These symbols define a full state reset, reading `Accuracy_Log` bits.
Then, all remaining symbols, sorted in natural order, are allocated cells.
Starting from symbol `0` (if it exists), and table position `0`,
each symbol gets allocated as many cells as its probability.
Cell allocation is spread, not linear :
each successor position follows this rule :
Then, all remaining symbols, sorted in natural order, are allocated rows.
Starting from smallest present symbol, and table position `0`,
each symbol gets allocated as many rows as its probability.
Row allocation is not linear, it follows this order, in modular arithmetic:
```
position += (tableSize>>1) + (tableSize>>3) + 3;
position &= tableSize-1;
```
A position is skipped if already occupied by a "less than 1" probability symbol.
`position` does not reset between symbols, it simply iterates through
each position in the table, switching to the next symbol when enough
states have been allocated to the current one.
Using above ordering rule, each symbol gets allocated as many rows as its probability.
If a position is already occupied by a "less than 1" probability symbol,
it is simply skipped, and the next position is allocated instead.
Once enough rows have been allocated for the current symbol,
the allocation process continues, using the next symbol, in natural order.
This process guarantees that the table is entirely and exactly filled.
The process guarantees that the table is entirely filled.
Each cell corresponds to a state value, which contains the symbol being decoded.
Each row specifies a decoded symbol, and is accessed by current state value.
It also specifies `Number_of_Bits` and `Baseline`, which are required to determine next state value.
To add the `Number_of_Bits` and `Baseline` required to retrieve next state,
it's first necessary to sort all occurrences of each symbol in state order.
Lower states will need 1 more bit than higher ones.
The process is repeated for each symbol.
To correctly set these fields, it's necessary to sort all occurrences of each symbol in state value order,
and then attribute N+1 bits to lower rows, and N bits to higher rows,
following the process described below (using an example):
__Example__ :
Presuming a symbol has a probability of 5,
it receives 5 cells, corresponding to 5 state values.
These state values are then sorted in natural order.
Presuming an `Accuracy_Log` of 7,
let's imagine a symbol with a Probability of 5:
it receives 5 rows, corresponding to 5 state values between `0` and `127`.
Next power of 2 after 5 is 8.
Space of probabilities must be divided into 8 equal parts.
Presuming the `Accuracy_Log` is 7, it defines a space of 128 states.
Divided by 8, each share is 16 large.
In this example, the first state value happens to be `1` (after unspecified previous symbols).
The next 4 states are then determined using above modular arithmetic rule,
which specifies to add `64+16+3 = 83` modulo `128` to jump to next position,
producing the following series: `1`, `84`, `39`, `122`, `77` (modular arithmetic).
(note: the next symbol will then start at `32`).
In order to reach 8 shares, 8-5=3 lowest states will count "double",
These state values are then sorted in natural order,
resulting in the following series: `1`, `39`, `77`, `84`, `122`.
The next power of 2 after 5 is 8.
Therefore, the probability space will be divided into 8 equal parts.
Since the probability space is `1<<7 = 128` large, each share is `128/8 = 16` large.
In order to reach 8 shares, the `8-5 = 3` lowest states will count "double",
doubling their shares (32 in width), hence requiring one more bit.
Baseline is assigned starting from the higher states using fewer bits,
increasing at each state, then resuming at the first state,
each state takes its allocated width from Baseline.
Baseline is assigned starting from the lowest state using fewer bits,
continuing in natural state order, looping back at the beginning.
Each state takes its allocated range from Baseline, sized by its `Number_of_Bits`.
| state value | 1 | 39 | 77 | 84 | 122 |
| state order | 0 | 1 | 2 | 3 | 4 |
| ---------------- | ----- | ----- | ------ | ---- | ------ |
| state value | 1 | 39 | 77 | 84 | 122 |
| width | 32 | 32 | 32 | 16 | 16 |
| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
| range number | 2 | 4 | 6 | 0 | 1 |
| allocation order | 3 | 4 | 5 | 1 | 2 |
| `Baseline` | 32 | 64 | 96 | 0 | 16 |
| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
During decoding, the next state value is determined from current state value,
by reading the required `Number_of_Bits`, and adding the specified `Baseline`.
During decoding, the next state value is determined by using current state value as row number,
then reading the required `Number_of_Bits` from the bitstream, and adding the specified `Baseline`.
See [Appendix A] for the results of this process applied to the default distributions.
Note:
as a trivial example, it follows that, for a symbol with a Probability of `1`,
`Baseline` is necessarily `0`, and `Number_of_Bits` is necessarily `Accuracy_Log`.
See [Appendix A] to see the outcome of this process applied to the default distributions.
[Appendix A]: #appendix-a---decoding-tables-for-predefined-codes
@@ -1183,7 +1234,7 @@ Huffman Coding
--------------
Zstandard Huffman-coded streams are read backwards,
similar to the FSE bitstreams.
Therefore, to find the start of the bitstream, it is therefore to
Therefore, to find the start of the bitstream, it is required to
know the offset of the last byte of the Huffman-coded stream.
After writing the last bit containing information, the compressor
@@ -1219,48 +1270,61 @@ This specification limits maximum code length to 11 bits.
#### Representation
All literal values from zero (included) to last present one (excluded)
All literal symbols from zero (included) to last present one (excluded)
are represented by `Weight` with values from `0` to `Max_Number_of_Bits`.
Transformation from `Weight` to `Number_of_Bits` follows this formula :
```
Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0
```
The last symbol's `Weight` is deduced from previously decoded ones,
by completing to the nearest power of 2.
This power of 2 gives `Max_Number_of_Bits`, the depth of the current tree.
When a literal symbol is not present, it receives a `Weight` of 0.
The least frequent symbol receives a `Weight` of 1.
If no literal has a `Weight` of 1, then the data is considered corrupted.
If there are not at least two literals with non-zero `Weight`, then the data
is considered corrupted.
The most frequent symbol receives a `Weight` anywhere between 1 and 11 (max).
The last symbol's `Weight` is deduced from previously retrieved Weights,
by completing to the nearest power of 2. It's necessarily non 0.
If it's not possible to reach a clean power of 2 with a single `Weight` value,
the Huffman Tree Description is considered invalid.
This final power of 2 gives `Max_Number_of_Bits`, the depth of the current tree.
`Max_Number_of_Bits` must be <= 11,
otherwise the representation is considered corrupted.
__Example__ :
Let's presume the following Huffman tree must be described :
| literal value | 0 | 1 | 2 | 3 | 4 | 5 |
| literal symbol | A | B | C | D | E | F |
| ---------------- | --- | --- | --- | --- | --- | --- |
| `Number_of_Bits` | 1 | 2 | 3 | 0 | 4 | 4 |
The tree depth is 4, since its longest elements uses 4 bits
(longest elements are the one with smallest frequency).
Value `5` will not be listed, as it can be determined from values for 0-4,
nor will values above `5` as they are all 0.
Values from `0` to `4` will be listed using `Weight` instead of `Number_of_Bits`.
(longest elements are the ones with smallest frequency).
All symbols will now receive a `Weight` instead of `Number_of_Bits`.
Weight formula is :
```
Weight = Number_of_Bits ? (Max_Number_of_Bits + 1 - Number_of_Bits) : 0
```
It gives the following series of weights :
It gives the following series of Weights :
| literal value | 0 | 1 | 2 | 3 | 4 |
| ------------- | --- | --- | --- | --- | --- |
| `Weight` | 4 | 3 | 2 | 0 | 1 |
| literal symbol | A | B | C | D | E | F |
| -------------- | --- | --- | --- | --- | --- | --- |
| `Weight` | 4 | 3 | 2 | 0 | 1 | 1 |
This list will be sent to the decoder, with the following modifications:
- `F` will not be listed, because it can be determined from previous symbols
- nor will symbols above `F` as they are all 0
- on the other hand, all symbols before `A`, starting with `\0`, will be listed, with a Weight of 0.
The decoder will do the inverse operation :
having collected weights of literal symbols from `0` to `4`,
it knows the last literal, `5`, is present with a non-zero `Weight`.
The `Weight` of `5` can be determined by advancing to the next power of 2.
having collected weights of literal symbols from `A` to `E`,
it knows the last literal, `F`, is present with a non-zero `Weight`.
The `Weight` of `F` can be determined by advancing to the next power of 2.
The sum of `2^(Weight-1)` (excluding 0's) is :
`8 + 4 + 2 + 0 + 1 = 15`.
Nearest larger power of 2 value is 16.
Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 16-15 = 1`.
Therefore, `Max_Number_of_Bits = log2(16) = 4` and `Weight[F] = log_2(16 - 15) + 1 = 1`.
#### Huffman Tree header
@@ -1300,7 +1364,7 @@ sharing a single distribution table.
To decode an FSE bitstream, it is necessary to know its compressed size.
Compressed size is provided by `headerByte`.
It's also necessary to know its _maximum possible_ decompressed size,
which is `255`, since literal values span from `0` to `255`,
which is `255`, since literal symbols span from `0` to `255`,
and last symbol's `Weight` is not represented.
An FSE bitstream starts by a header, describing probabilities distribution.
@@ -1322,6 +1386,13 @@ If updating state after decoding a symbol would require more bits than
remain in the stream, it is assumed that extra bits are 0. Then,
symbols for each of the final states are decoded and the process is complete.
If this process would produce more weights than the maximum number of decoded
weights (255), then the data is considered corrupted.
If either of the 2 initial states are absent or truncated, then the data is
considered corrupted. Consequently, it is not possible to encode fewer than
2 weights using this mode.
#### Conversion from weights to Huffman prefix codes
All present symbols shall now have a `Weight` value.
@@ -1329,26 +1400,28 @@ It is possible to transform weights into `Number_of_Bits`, using this formula:
```
Number_of_Bits = (Weight>0) ? Max_Number_of_Bits + 1 - Weight : 0
```
Symbols are sorted by `Weight`.
Within same `Weight`, symbols keep natural sequential order.
In order to determine which prefix code is assigned to each Symbol,
Symbols are first sorted by `Weight`, then by natural sequential order.
Symbols with a `Weight` of zero are removed.
Then, starting from lowest `Weight`, prefix codes are distributed in sequential order.
Then, starting from lowest `Weight` (hence highest `Number_of_Bits`),
prefix codes are assigned in ascending order.
__Example__ :
Let's presume the following list of weights has been decoded :
Let's assume the following list of weights has been decoded:
| Literal | 0 | 1 | 2 | 3 | 4 | 5 |
| Literal | A | B | C | D | E | F |
| -------- | --- | --- | --- | --- | --- | --- |
| `Weight` | 4 | 3 | 2 | 0 | 1 | 1 |
Sorted by weight and then natural sequential order,
it gives the following distribution :
it gives the following prefix codes distribution:
| Literal | 3 | 4 | 5 | 2 | 1 | 0 |
| ---------------- | --- | --- | --- | --- | --- | ---- |
| `Weight` | 0 | 1 | 1 | 2 | 3 | 4 |
| `Number_of_Bits` | 0 | 4 | 4 | 3 | 2 | 1 |
| prefix codes | N/A | 0000| 0001| 001 | 01 | 1 |
| Literal | D | E | F | C | B | A |
| ---------------- | --- | ---- | ---- | ---- | ---- | ---- |
| `Weight` | 0 | 1 | 1 | 2 | 3 | 4 |
| `Number_of_Bits` | 0 | 4 | 4 | 3 | 2 | 1 |
| prefix code | N/A | 0000 | 0001 | 001 | 01 | 1 |
| ascending order | N/A | 0000 | 0001 | 001x | 01xx | 1xxx |
### Huffman-coded Streams
@@ -1371,10 +1444,10 @@ it's possible to read the bitstream in a __little-endian__ fashion,
keeping track of already used bits. Since the bitstream is encoded in reverse
order, starting from the end read symbols in forward order.
For example, if the literal sequence "0145" was encoded using above prefix code,
For example, if the literal sequence `ABEF` was encoded using above prefix code,
it would be encoded (in reverse order) as:
|Symbol | 5 | 4 | 1 | 0 | Padding |
|Symbol | F | E | B | A | Padding |
|--------|------|------|----|---|---------|
|Encoding|`0000`|`0001`|`01`|`1`| `00001` |
@@ -1669,6 +1742,12 @@ or at least provide a meaningful error code explaining for which reason it canno
Version changes
---------------
- 0.4.3 : clarifications for Huffman prefix code assignment example
- 0.4.2 : refactor FSE table construction process, inspired by Donald Pian
- 0.4.1 : clarifications on a few error scenarios, by Eric Lasota
- 0.4.0 : fixed imprecise behavior for nbSeq==0, detected by Igor Pavlov
- 0.3.9 : clarifications for Huffman-compressed literal sizes.
- 0.3.8 : clarifications for Huffman Blocks and Huffman Tree descriptions.
- 0.3.7 : clarifications for Repeat_Offsets, matching RFC8878
- 0.3.6 : clarifications for Dictionary_ID
- 0.3.5 : clarifications for Block_Maximum_Size
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,5 +1,5 @@
# ################################################################
# Copyright (c) Yann Collet, Facebook, Inc.
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
+22 -10
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -21,6 +21,17 @@
#include <sys/stat.h> // stat
#include <zstd.h>
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
#define HEADER_FUNCTION static UNUSED_ATTR
/*
* Define the returned error code from utility functions.
*/
@@ -68,7 +79,7 @@ typedef enum {
*
* @return The size of a given file path.
*/
static size_t fsize_orDie(const char *filename)
HEADER_FUNCTION size_t fsize_orDie(const char *filename)
{
struct stat st;
if (stat(filename, &st) != 0) {
@@ -96,7 +107,7 @@ static size_t fsize_orDie(const char *filename)
* @return If successful this function will return a FILE pointer to an
* opened file otherwise it sends an error to stderr and exits.
*/
static FILE* fopen_orDie(const char *filename, const char *instruction)
HEADER_FUNCTION FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
@@ -108,7 +119,7 @@ static FILE* fopen_orDie(const char *filename, const char *instruction)
/*! fclose_orDie() :
* Close an opened file using given FILE pointer.
*/
static void fclose_orDie(FILE* file)
HEADER_FUNCTION void fclose_orDie(FILE* file)
{
if (!fclose(file)) { return; };
/* error */
@@ -123,7 +134,7 @@ static void fclose_orDie(FILE* file)
*
* @return The number of bytes read.
*/
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
HEADER_FUNCTION size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
@@ -143,7 +154,7 @@ static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
*
* @return The number of bytes written.
*/
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
HEADER_FUNCTION size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
@@ -159,7 +170,7 @@ static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
* cated memory. If there is an error, this function will send that
* error to stderr and exit.
*/
static void* malloc_orDie(size_t size)
HEADER_FUNCTION void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
@@ -177,7 +188,7 @@ static void* malloc_orDie(size_t size)
* @return If successful this function will load file into buffer and
* return file size, otherwise it will printout an error to stderr and exit.
*/
static size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize)
HEADER_FUNCTION size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize)
{
size_t const fileSize = fsize_orDie(fileName);
CHECK(fileSize <= bufferSize, "File too large!");
@@ -201,7 +212,8 @@ static size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSi
* @return If successful this function will return buffer and bufferSize(=fileSize),
* otherwise it will printout an error to stderr and exit.
*/
static void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize) {
HEADER_FUNCTION void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize)
{
size_t const fileSize = fsize_orDie(fileName);
*bufferSize = fileSize;
void* const buffer = malloc_orDie(*bufferSize);
@@ -217,7 +229,7 @@ static void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize) {
* Note: This function will send an error to stderr and exit if it
* cannot write to a given file.
*/
static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize)
HEADER_FUNCTION void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize)
{
FILE* const oFile = fopen_orDie(fileName, "wb");
size_t const wSize = fwrite(buff, 1, buffSize, oFile);
@@ -1,12 +1,22 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
**/
/* This example deals with Dictionary compression,
* its counterpart is `examples/dictionary_decompression.c` .
* These examples presume that a dictionary already exists.
* The main method to create a dictionary is `zstd --train`,
* look at the CLI documentation for details.
* Another possible method is to employ dictionary training API,
* published in `lib/zdict.h` .
**/
#include <stdio.h> // printf
#include <stdlib.h> // free
#include <string.h> // memset, strcat
@@ -14,7 +24,7 @@
#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
/* createDict() :
`dictFileName` is supposed to have been created using `zstd --train` */
** `dictFileName` is supposed already created using `zstd --train` */
static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel)
{
size_t dictSize;
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -42,7 +42,13 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve
*/
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) );
CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
if (nbThreads > 1) {
size_t const r = ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
if (ZSTD_isError(r)) {
fprintf (stderr, "Note: the linked libzstd library doesn't support multithreading. "
"Reverting to single-thread mode. \n");
}
}
/* This loop read from the input file, compresses that entire chunk,
* and writes all output produced to the output file.
@@ -117,7 +123,7 @@ int main(int argc, const char** argv)
}
int cLevel = 1;
int nbThreads = 4;
int nbThreads = 1;
if (argc >= 3) {
cLevel = atoi (argv[2]);
@@ -1,5 +1,5 @@
/*
* Copyright (c) Martin Liska, SUSE, Facebook, Inc.
* Copyright (c) Martin Liska, SUSE, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
+67 -35
View File
@@ -1,5 +1,5 @@
# ################################################################
# Copyright (c) Yann Collet, Facebook, Inc.
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,9 @@
# You may select, at your option, one of the above-listed licenses.
# ################################################################
# default target (when running `make` with no argument)
lib-release:
# Modules
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
@@ -54,13 +57,14 @@ VERSION := $(ZSTD_VERSION)
# Note: by default, the static library is built single-threaded and dynamic library is built
# multi-threaded. It is possible to force multi or single threaded builds by appending
# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
.PHONY: default
default: lib-release
CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
LDFLAGS_DYNLIB += -pthread
CPPFLAGS_STATLIB += # static library build defaults to single-threaded
CPPFLAGS_STATICLIB += # static library build defaults to single-threaded
# pkg-config Libs.private points to LDFLAGS_DYNLIB
PCLIB := $(LDFLAGS_DYNLIB)
ifeq ($(findstring GCC,$(CCVER)),GCC)
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
@@ -69,13 +73,15 @@ endif
# macOS linker doesn't support -soname, and use different extension
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
ifeq ($(UNAME), Darwin)
UNAME_TARGET_SYSTEM ?= $(UNAME)
ifeq ($(UNAME_TARGET_SYSTEM), Darwin)
SHARED_EXT = dylib
SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
else
ifeq ($(UNAME), AIX)
ifeq ($(UNAME_TARGET_SYSTEM), AIX)
SONAME_FLAGS =
else
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
@@ -91,7 +97,7 @@ all: lib
.PHONY: libzstd.a # must be run every time
libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)
libzstd.a: CPPFLAGS += $(CPPFLAGS_STATICLIB)
SET_CACHE_DIRECTORY = \
+$(MAKE) --no-print-directory $@ \
@@ -109,19 +115,19 @@ libzstd.a:
else
# BUILD_DIR is defined
ZSTD_STATLIB_DIR := $(BUILD_DIR)/static
ZSTD_STATLIB := $(ZSTD_STATLIB_DIR)/libzstd.a
ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
$(ZSTD_STATLIB): ARFLAGS = rcs
$(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
$(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
ZSTD_STATICLIB_DIR := $(BUILD_DIR)/static
ZSTD_STATICLIB := $(ZSTD_STATICLIB_DIR)/libzstd.a
ZSTD_STATICLIB_OBJ := $(addprefix $(ZSTD_STATICLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
$(ZSTD_STATICLIB): ARFLAGS = rcs
$(ZSTD_STATICLIB): | $(ZSTD_STATICLIB_DIR)
$(ZSTD_STATICLIB): $(ZSTD_STATICLIB_OBJ)
# Check for multithread flag at target execution time
$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
@echo compiling multi-threaded static library $(LIBVER),\
@echo compiling single-threaded static library $(LIBVER))
$(AR) $(ARFLAGS) $@ $^
libzstd.a: $(ZSTD_STATLIB)
libzstd.a: $(ZSTD_STATICLIB)
cp -f $< $@
endif
@@ -182,14 +188,17 @@ lib : libzstd.a libzstd
# make does not consider implicit pattern rule for .PHONY target
%-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD
%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD
%-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD
%-mt : LDFLAGS_DYNLIB := -pthread
%-mt : PCLIB :=
%-mt : PCMTLIB := $(LDFLAGS_DYNLIB)
%-mt : %
@echo multi-threaded build completed
%-nomt : CPPFLAGS_DYNLIB :=
%-nomt : LDFLAGS_DYNLIB :=
%-nomt : CPPFLAGS_STATLIB :=
%-nomt : CPPFLAGS_STATICLIB :=
%-nomt : PCLIB :=
%-nomt : %
@echo single-threaded build completed
@@ -200,42 +209,52 @@ lib : libzstd.a libzstd
# Generate .h dependencies automatically
DEPFLAGS = -MT $@ -MMD -MP -MF
# -MMD: compiler generates dependency information as a side-effect of compilation, without system headers
# -MP: adds phony target for each dependency other than main file.
DEPFLAGS = -MMD -MP
$(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR)
# ensure that ZSTD_DYNLIB_DIR exists prior to generating %.o
$(ZSTD_DYNLIB_DIR)/%.o : %.c | $(ZSTD_DYNLIB_DIR)
@echo CC $@
$(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
$(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $<
$(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
$(ZSTD_STATICLIB_DIR)/%.o : %.c | $(ZSTD_STATICLIB_DIR)
@echo CC $@
$(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
$(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $<
$(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR)
@echo AS $@
$(COMPILE.S) $(OUTPUT_OPTION) $<
$(ZSTD_STATLIB_DIR)/%.o : %.S | $(ZSTD_STATLIB_DIR)
$(ZSTD_STATICLIB_DIR)/%.o : %.S | $(ZSTD_STATICLIB_DIR)
@echo AS $@
$(COMPILE.S) $(OUTPUT_OPTION) $<
MKDIR ?= mkdir
$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
$(MKDIR) -p $@
MKDIR ?= mkdir -p
$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATICLIB_DIR):
$(MKDIR) $@
DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATLIB_OBJ:.o=.d)
DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATICLIB_OBJ:.o=.d)
$(DEPFILES):
include $(wildcard $(DEPFILES))
# The leading '-' means: do not fail is include fails (ex: directory does not exist yet)
-include $(wildcard $(DEPFILES))
# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
ZSTDMT_FILES = compress/zstdmt_compress.c
ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
# Special case : build library in single-thread mode _and_ without zstdmt_compress.c
# Note : we still need threading.c and pool.c for the dictionary builder,
# but they will correctly behave single-threaded.
ZSTDMT_FILES = zstdmt_compress.c
ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(notdir $(ZSTD_FILES)))
libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden
libzstd-nomt: LDFLAGS += -shared
libzstd-nomt: $(ZSTD_NOMT_FILES)
@echo compiling single-thread dynamic library $(LIBVER)
@echo files : $(ZSTD_NOMT_FILES)
@if echo "$(ZSTD_NOMT_FILES)" | tr ' ' '\n' | $(GREP) -q zstdmt; then \
echo "Error: Found zstdmt in list."; \
exit 1; \
fi
$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
.PHONY: clean
@@ -249,7 +268,7 @@ clean:
#-----------------------------------------------------------------------------
# make install is validated only for below listed environments
#-----------------------------------------------------------------------------
ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX))
ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT% CYGWIN_NT%,$(UNAME)))
lib: libzstd.pc
@@ -280,13 +299,21 @@ PCLIBPREFIX := $(if $(findstring $(LIBDIR),$(PCLIBDIR)),,$${exec_prefix})
# to PREFIX, rather than as a resolved value.
PCEXEC_PREFIX := $(if $(HAS_EXPLICIT_EXEC_PREFIX),$(EXEC_PREFIX),$${prefix})
ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly))
ifneq ($(MT),)
PCLIB :=
PCMTLIB := $(LDFLAGS_DYNLIB)
else
PCLIB := $(LDFLAGS_DYNLIB)
endif
ifneq (,$(filter FreeBSD NetBSD DragonFly,$(UNAME)))
PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
else
PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
endif
ifneq (,$(filter $(UNAME),SunOS))
ifneq (,$(filter SunOS,$(UNAME)))
INSTALL ?= ginstall
else
INSTALL ?= install
@@ -296,15 +323,20 @@ INSTALL_PROGRAM ?= $(INSTALL)
INSTALL_DATA ?= $(INSTALL) -m 644
# pkg-config library define.
# For static single-threaded library declare -pthread in Libs.private
# For static multi-threaded library declare -pthread in Libs and Cflags
.PHONY: libzstd.pc
libzstd.pc: libzstd.pc.in
@echo creating pkgconfig
@sed $(SED_ERE_OPT) \
@sed \
-e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@EXEC_PREFIX@|$(PCEXEC_PREFIX)|' \
-e 's|@INCLUDEDIR@|$(PCINCPREFIX)$(PCINCDIR)|' \
-e 's|@LIBDIR@|$(PCLIBPREFIX)$(PCLIBDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
-e 's|@LIBS_PRIVATE@|$(LDFLAGS_DYNLIB)|' \
-e 's|@LIBS_MT@|$(PCMTLIB)|' \
-e 's|@LIBS_PRIVATE@|$(PCLIB)|' \
$< >$@
.PHONY: install
+33 -2
View File
@@ -27,12 +27,16 @@ Enabling multithreading requires 2 conditions :
For convenience, we provide a build target to generate multi and single threaded libraries:
- Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
Note that the `.pc` generated on calling `make lib-mt` will already include the require Libs and Cflags.
- Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
- By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
When linking a POSIX program with a multithreaded version of `libzstd`,
note that it's necessary to invoke the `-pthread` flag during link stage.
The `.pc` generated from `make install` or `make install-pc` always assume a single-threaded static library
is compiled. To correctly generate a `.pc` for the multi-threaded static library, set `MT=1` as ENV variable.
Multithreading capabilities are exposed
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
@@ -88,10 +92,10 @@ The file structure is designed to make this selection manually achievable for an
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
- While invoking `make libzstd`, it's possible to define build macros
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
`ZSTD_LIB_COMPRESSION`, `ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
corresponding features. This will also disable compilation of all
dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
dependencies (e.g. `ZSTD_LIB_COMPRESSION=0` will also disable
dictBuilder).
- There are a number of options that can help minimize the binary size of
@@ -119,6 +123,15 @@ The file structure is designed to make this selection manually achievable for an
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
On the compressor side, Zstd's compression levels map to several internal
strategies. In environments where the higher compression levels aren't used,
it is possible to exclude all but the fastest strategy with
`ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1`. (Note that this will change
the behavior of the default compression level.) Or if you want to retain the
default compressor as well, you can set
`ZSTD_LIB_EXCLUDE_COMPRESSORS_GREEDY_AND_UP=1`, at the cost of an additional
~20KB or so.
For squeezing the last ounce of size out, you can also define
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
which removes the error messages that are otherwise returned by
@@ -136,6 +149,13 @@ The file structure is designed to make this selection manually achievable for an
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
the shared library, which is now hidden by default.
- The build macro `STATIC_BMI2` can be set to 1 to force usage of `bmi2` instructions.
It is generally not necessary to set this build macro,
because `STATIC_BMI2` will be automatically set to 1
on detecting the presence of the corresponding instruction set in the compilation target.
It's nonetheless available as an optional manual toggle for better control,
and can also be used to forcefully disable `bmi2` instructions by setting it to 0.
- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
which can detect at runtime the presence of BMI2 instructions, and use them only if present.
These instructions contribute to better performance, notably on the decoder side.
@@ -161,6 +181,17 @@ The file structure is designed to make this selection manually achievable for an
`ZSTD_DCtx` decompression contexts,
but might also result in a small decompression speed cost.
- The C compiler macros `ZSTDLIB_VISIBLE`, `ZSTDERRORLIB_VISIBLE` and `ZDICTLIB_VISIBLE`
can be overridden to control the visibility of zstd's API. Additionally,
`ZSTDLIB_STATIC_API` and `ZDICTLIB_STATIC_API` can be overridden to control the visibility
of zstd's static API. Specifically, it can be set to `ZSTDLIB_HIDDEN` to hide the symbols
from the shared library. These macros default to `ZSTDLIB_VISIBILITY`,
`ZSTDERRORLIB_VSIBILITY`, and `ZDICTLIB_VISIBILITY` if unset, for backwards compatibility
with the old macro names.
- The C compiler macro `HUF_DISABLE_FAST_DECODE` disables the newer Huffman fast C
and assembly decoding loops. You may want to use this macro if these loops are
slower on your platform.
#### Windows : using MinGW+MSYS to create DLL
+55
View File
@@ -0,0 +1,55 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides custom allocation primitives
*/
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "compiler.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY
#include "../zstd.h" /* ZSTD_customMem */
#ifndef ZSTD_ALLOCATIONS_H
#define ZSTD_ALLOCATIONS_H
/* custom memory allocation functions */
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}
#endif /* ZSTD_ALLOCATIONS_H */
+205
View File
@@ -0,0 +1,205 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_BITS_H
#define ZSTD_BITS_H
#include "mem.h"
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9};
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
{
assert(val != 0);
#if defined(_MSC_VER)
# if STATIC_BMI2
return (unsigned)_tzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward(&r, val);
return (unsigned)r;
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)__builtin_ctz(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_ctz(val);
#else
return ZSTD_countTrailingZeros32_fallback(val);
#endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31};
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
{
assert(val != 0);
#if defined(_MSC_VER)
# if STATIC_BMI2
return (unsigned)_lzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)(31 - r);
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)__builtin_clz(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_clz(val);
#else
return ZSTD_countLeadingZeros32_fallback(val);
#endif
}
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
{
assert(val != 0);
#if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return (unsigned)_tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, val);
return (unsigned)r;
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
return (unsigned)__builtin_ctzll(val);
#elif defined(__ICCARM__)
return (unsigned)__builtin_ctzll(val);
#else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
} else {
return ZSTD_countTrailingZeros32(leastSignificantWord);
}
}
#endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
{
assert(val != 0);
#if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return (unsigned)_lzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, val);
return (unsigned)(63 - r);
} else {
__assume(0); /* Should not reach this code path */
}
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val));
#elif defined(__ICCARM__)
return (unsigned)(__builtin_clzll(val));
#else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (mostSignificantWord == 0) {
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
} else {
return ZSTD_countLeadingZeros32(mostSignificantWord);
}
}
#endif
}
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
return ZSTD_countTrailingZeros64((U64)val) >> 3;
} else {
return ZSTD_countTrailingZeros32((U32)val) >> 3;
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
return ZSTD_countLeadingZeros64((U64)val) >> 3;
} else {
return ZSTD_countLeadingZeros32((U32)val) >> 3;
}
}
}
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
return 31 - ZSTD_countLeadingZeros32(val);
}
/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
*/
MEM_STATIC
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
assert(count < 64);
count &= 0x3F; /* for fickle pattern recognition */
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
}
MEM_STATIC
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
assert(count < 32);
count &= 0x1F; /* for fickle pattern recognition */
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
}
MEM_STATIC
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
assert(count < 16);
count &= 0x0F; /* for fickle pattern recognition */
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
}
#endif /* ZSTD_BITS_H */
+84 -108
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* bitstream
* Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -14,9 +14,6 @@
#ifndef BITSTREAM_H_MODULE
#define BITSTREAM_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/*
* This API consists of small unitary functions, which must be inlined for best performance.
* Since link-time-optimization is not available for all compilers,
@@ -30,14 +27,14 @@ extern "C" {
#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
#include "bits.h" /* ZSTD_highbit32 */
/*=========================================
* Target specific
=========================================*/
#ifndef ZSTD_NO_INTRINSICS
# if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
# elif defined(__ICCARM__)
# include <intrinsics.h>
# endif
@@ -51,12 +48,13 @@ extern "C" {
/*-******************************************
* bitStream encoding API (write forward)
********************************************/
typedef size_t BitContainerType;
/* bitStream can mix input from multiple sources.
* A critical property of these streams is that they encode and decode in **reverse** direction.
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
*/
typedef struct {
size_t bitContainer;
BitContainerType bitContainer;
unsigned bitPos;
char* startPtr;
char* ptr;
@@ -64,7 +62,7 @@ typedef struct {
} BIT_CStream_t;
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
@@ -73,7 +71,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
*
* bits are first added to a local register.
* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
* Writing data into memory is an explicit operation, performed by the flushBits function.
* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
* After a flushBits, a maximum of 7 bits might still be stored into local register.
@@ -90,28 +88,28 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* bitStream decoding API (read backward)
**********************************************/
typedef struct {
size_t bitContainer;
BitContainerType bitContainer;
unsigned bitsConsumed;
const char* ptr;
const char* start;
const char* limitPtr;
} BIT_DStream_t;
typedef enum { BIT_DStream_unfinished = 0,
BIT_DStream_endOfBuffer = 1,
BIT_DStream_completed = 2,
BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
} BIT_DStream_status; /* result of BIT_reloadDStream() */
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/* Start by invoking BIT_initDStream().
* A chunk of the bitStream is then stored into a local register.
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
* You can then retrieve bitFields stored into the local register, **in reverse order**.
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -123,7 +121,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/*-****************************************
* unsafe API
******************************************/
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
@@ -132,48 +130,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */
/*-**************************************************************
* Internal functions
****************************************************************/
MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
# if STATIC_BMI2 == 1
return _lzcnt_u32(val) ^ 31;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*===== Local Constants =====*/
static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F,
@@ -203,16 +159,31 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0;
}
FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
{
#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
# if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
return _bzhi_u64(bitContainer, nbBits);
# else
DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
return _bzhi_u32(bitContainer, nbBits);
# endif
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_addBits() :
* can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
BitContainerType value, unsigned nbBits)
{
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits;
}
@@ -220,7 +191,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
* works only if `value` is _clean_,
* meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
BitContainerType value, unsigned nbBits)
{
assert((value>>nbBits) == 0);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
@@ -267,7 +238,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
BIT_addBitsFast(bitC, 1, 1); /* endMark */
BIT_flushBits(bitC);
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
}
@@ -291,35 +262,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else {
bitD->ptr = bitD->start;
bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize)
{
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
ZSTD_FALLTHROUGH;
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
ZSTD_FALLTHROUGH;
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
ZSTD_FALLTHROUGH;
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
ZSTD_FALLTHROUGH;
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
ZSTD_FALLTHROUGH;
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
ZSTD_FALLTHROUGH;
default: break;
}
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
}
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -328,12 +299,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize;
}
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
{
return bitContainer >> start;
}
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -343,30 +314,20 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
#if defined(__x86_64__) || defined(_M_X86)
#if defined(__x86_64__) || defined(_M_X64)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
}
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_lookBits() :
* Provides next n bits from local register.
* local register is not modified.
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
@@ -382,14 +343,14 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U3
/*! BIT_lookBitsFast() :
* unsafe version; only works if nbBits >= 1 */
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
{
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
assert(nbBits >= 1);
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
@@ -398,23 +359,38 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BitContainerType const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
return value;
}
/*! BIT_readBitsFast() :
* unsafe version; only works only if nbBits >= 1 */
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
* unsafe version; only works if nbBits >= 1 */
MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBitsFast(bitD, nbBits);
BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
assert(nbBits >= 1);
BIT_skipBits(bitD, nbBits);
return value;
}
/*! BIT_reloadDStream_internal() :
* Simple variant of BIT_reloadDStream(), with two conditions:
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
*/
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
{
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
bitD->ptr -= bitD->bitsConsumed >> 3;
assert(bitD->ptr >= bitD->start);
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
}
/*! BIT_reloadDStreamFast() :
* Similar to BIT_reloadDStream(), but with two differences:
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -425,31 +401,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
{
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
return BIT_DStream_overflow;
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
bitD->ptr -= bitD->bitsConsumed >> 3;
bitD->bitsConsumed &= 7;
bitD->bitContainer = MEM_readLEST(bitD->ptr);
return BIT_DStream_unfinished;
return BIT_reloadDStream_internal(bitD);
}
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
* This function is safe, it guarantees it will not never beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
static const BitContainerType zeroFilled = 0;
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
/* overflow detected, erroneous scenario or end of stream: no update */
return BIT_DStream_overflow;
}
assert(bitD->ptr >= bitD->start);
if (bitD->ptr >= bitD->limitPtr) {
return BIT_reloadDStreamFast(bitD);
return BIT_reloadDStream_internal(bitD);
}
if (bitD->ptr == bitD->start) {
/* reached end of bitStream => no update */
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed;
}
/* start < ptr < limitPtr */
/* start < ptr < limitPtr => cautious update */
{ U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) {
@@ -471,8 +451,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
}
#if defined (__cplusplus)
}
#endif
#endif /* BITSTREAM_H_MODULE */
+170 -41
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,6 +11,8 @@
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H
#include <stddef.h>
#include "portability_macros.h"
/*-*******************************************************
@@ -25,7 +27,7 @@
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__) || defined(__ICCARM__)
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@@ -51,12 +53,19 @@
# define WIN_CDECL
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
* branches.
*/
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
/**
* HINT_INLINE is used to help the compiler generate better code. It is *not*
* used for "templates", so it can be tweaked based on the compilers
@@ -71,21 +80,37 @@
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
# define HINT_INLINE static INLINE_KEYWORD
#else
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
# define HINT_INLINE FORCE_INLINE_TEMPLATE
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
/* "soft" inline :
* The compiler is free to select if it's a good idea to inline or not.
* The main objective is to silence compiler warnings
* when a defined function in included but not used.
*
* Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
* Updating the prefix is probably preferable, but requires a fairly large codemod,
* since this name is used everywhere.
*/
#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
# define MEM_STATIC static __inline UNUSED_ATTR
#elif defined(__IAR_SYSTEMS_ICC__)
# define MEM_STATIC static inline UNUSED_ATTR
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else
# define UNUSED_ATTR
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#endif
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
# if defined(__GNUC__) || defined(__ICCARM__)
# if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
@@ -94,7 +119,7 @@
/* target attribute */
#if defined(__GNUC__) || defined(__ICCARM__)
#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
@@ -109,10 +134,10 @@
/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
@@ -120,24 +145,25 @@
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# elif defined(__aarch64__)
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
#define PREFETCH_AREA(p, s) { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
}
#define PREFETCH_AREA(p, s) \
do { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
} while (0)
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@@ -165,6 +191,12 @@
#define UNLIKELY(x) (x)
#endif
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
#else
# define ZSTD_UNREACHABLE do { assert(0); } while (0)
#endif
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */
@@ -175,28 +207,21 @@
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
#endif
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
#ifndef STATIC_BMI2
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
# define STATIC_BMI2 1
# endif
# endif
#endif
#ifndef STATIC_BMI2
#define STATIC_BMI2 0
#endif
/* compile time determination of SIMD support */
#if !defined(ZSTD_NO_INTRINSICS)
# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
# if defined(__AVX2__)
# define ZSTD_ARCH_X86_AVX2
# endif
# if defined(__SSE2__) || defined(_M_X64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
# define ZSTD_ARCH_X86_SSE2
# endif
# if defined(__ARM_NEON) || defined(_M_ARM64)
# define ZSTD_ARCH_ARM_NEON
# endif
#
# if defined(ZSTD_ARCH_X86_AVX2)
# include <immintrin.h>
# endif
# if defined(ZSTD_ARCH_X86_SSE2)
# include <emmintrin.h>
# elif defined(ZSTD_ARCH_ARM_NEON)
@@ -241,9 +266,15 @@
#endif
/*-**************************************************************
* Alignment check
* Alignment
*****************************************************************/
/* @return 1 if @u is a 2^n value, 0 otherwise
* useful to check a value is valid for alignment restrictions */
MEM_STATIC int ZSTD_isPower2(size_t u) {
return (u & (u-1)) == 0;
}
/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
@@ -269,11 +300,105 @@
# endif
#endif /* ZSTD_ALIGNOF */
#ifndef ZSTD_ALIGNED
/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
# if defined(__GNUC__) || defined(__clang__)
# define ZSTD_ALIGNED(a) __attribute__((aligned(a)))
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
# define ZSTD_ALIGNED(a) _Alignas(a)
#elif defined(_MSC_VER)
# define ZSTD_ALIGNED(n) __declspec(align(n))
# else
/* this compiler will require its own alignment instruction */
# define ZSTD_ALIGNED(...)
# endif
#endif /* ZSTD_ALIGNED */
/*-**************************************************************
* Sanitizer
*****************************************************************/
#if ZSTD_MEMORY_SANITIZER
/**
* Zstd relies on pointer overflow in its decompressor.
* We add this attribute to functions that rely on pointer overflow.
*/
#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# if __has_attribute(no_sanitize)
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
/* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
# else
/* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
# endif
# else
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
# endif
#endif
/**
* Helper function to perform a wrapped pointer difference without triggering
* UBSAN.
*
* @returns lhs - rhs with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
{
return lhs - rhs;
}
/**
* Helper function to perform a wrapped pointer add without triggering UBSAN.
*
* @return ptr + add with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
{
return ptr + add;
}
/**
* Helper function to perform a wrapped pointer subtraction without triggering
* UBSAN.
*
* @return ptr - sub with wrapping
*/
MEM_STATIC
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
{
return ptr - sub;
}
/**
* Helper function to add to a pointer that works around C's undefined behavior
* of adding 0 to NULL.
*
* @returns `ptr + add` except it defines `NULL + 0 == NULL`.
*/
MEM_STATIC
unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
{
return add > 0 ? ptr + add : ptr;
}
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
#endif
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
#endif
#endif
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
@@ -292,9 +417,13 @@ void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
/* Print shadow and origin for the memory range to stderr in a human-readable
format. */
void __msan_print_shadow(const volatile void *x, size_t size);
#endif
#if ZSTD_ADDRESS_SANITIZER
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
+37 -1
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -35,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f7b = 0;
U32 f7c = 0;
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
int reg[4];
__cpuid((int*)reg, 0);
{
@@ -50,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
f7c = (U32)reg[2];
}
}
#else
/* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
* which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
* to due to being a reserved register. So in that case, do the `cpuid`
* ourselves. Clang supports inline assembly anyway.
*/
U32 n;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(n)
: "a"(0)
: "rcx", "rdx");
if (n >= 1) {
U32 f1a;
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"popq %%rbx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
}
if (n >= 7) {
__asm__(
"pushq %%rbx\n\t"
"cpuid\n\t"
"movq %%rbx, %%rax\n\t"
"popq %%rbx"
: "=a"(f7b), "=c"(f7c)
: "a"(7), "c"(0)
: "rdx");
}
#endif
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
/* The following block like the normal cpuid branch below, but gcc
* reserves ebx for use of its pic register so we must specially
+7 -1
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* debug
* Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -21,4 +21,10 @@
#include "debug.h"
#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
* translation unit is empty. So remove this from Linux kernel builds, but
* otherwise just leave it in.
*/
int g_debuglevel = DEBUGLEVEL;
#endif
+21 -21
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* debug
* Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -32,10 +32,6 @@
#ifndef DEBUG_H_12987983217
#define DEBUG_H_12987983217
#if defined (__cplusplus)
extern "C" {
#endif
/* static assert is triggered at compile time, leaving no runtime artefact.
* static assert only works with compile-time constants.
@@ -85,23 +81,27 @@ extern int g_debuglevel; /* the variable is only declared,
It's useful when enabling very verbose levels
on selective conditions (such as position in src) */
# define RAWLOG(l, ...) { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \
} }
# define RAWLOG(l, ...) \
do { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__VA_ARGS__); \
} \
} while (0)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define LINE_AS_STRING TOSTRING(__LINE__)
# define DEBUGLOG(l, ...) \
do { \
if (l<=g_debuglevel) { \
ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
ZSTD_DEBUG_PRINT(" \n"); \
} \
} while (0)
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#endif
#if defined (__cplusplus)
}
# define RAWLOG(l, ...) do { } while (0) /* disabled */
# define DEBUGLOG(l, ...) do { } while (0) /* disabled */
#endif
#endif /* DEBUG_H_12987983217 */
+12 -40
View File
@@ -1,6 +1,6 @@
/* ******************************************************************
* Common functions of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -19,8 +19,8 @@
#include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h"
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
/*=== Version ===*/
@@ -38,34 +38,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
if (val != 0) {
unsigned long r;
_BitScanForward(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return __CTZ(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
@@ -113,7 +85,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* repeat.
* Avoid UB by setting the high bit to 1.
*/
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) {
@@ -124,7 +96,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
}
charnum += 3 * repeats;
bitStream >>= 2 * repeats;
@@ -189,7 +161,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* know that threshold > 1.
*/
if (remaining <= 1) break;
nbBits = BIT_highbit32(remaining) + 1;
nbBits = ZSTD_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1);
}
if (charnum >= maxSV1) break;
@@ -264,7 +236,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
const void* src, size_t srcSize)
{
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
}
FORCE_INLINE_TEMPLATE size_t
@@ -312,14 +284,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
U32 const verif = 1 << ZSTD_highbit32(rest);
U32 const lastWeight = ZSTD_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
@@ -356,13 +328,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int bmi2)
int flags)
{
#if DYNAMIC_BMI2
if (bmi2) {
if (flags & HUF_flags_bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
#endif
(void)bmi2;
(void)flags;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
+10 -2
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -27,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(version_unsupported): return "Version not supported";
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
@@ -38,17 +40,23 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
case PREFIX(cannotProduce_uncompressedBlock): return "This mode cannot generate an uncompressed block";
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch";
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
case PREFIX(maxCode):
default: return notErrorCode;
}
+46 -47
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -13,11 +13,6 @@
#ifndef ERROR_H_MODULE
#define ERROR_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* ****************************************
* Dependencies
******************************************/
@@ -26,7 +21,6 @@ extern "C" {
#include "debug.h"
#include "zstd_deps.h" /* size_t */
/* ****************************************
* Compiler-specific
******************************************/
@@ -60,8 +54,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
/* check and forward error code */
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
#define CHECK_V_F(e, f) \
size_t const e = f; \
do { \
if (ERR_isError(e)) \
return e; \
} while (0)
#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0)
/*-****************************************
@@ -95,10 +94,12 @@ void _force_has_format_string(const char *format, ...) {
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}
#define _FORCE_HAS_FORMAT_STRING(...) \
do { \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
} \
} while (0)
#define ERR_QUOTE(str) #str
@@ -109,51 +110,49 @@ void _force_has_format_string(const char *format, ...) {
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}
#define RETURN_ERROR_IF(cond, err, ...) \
do { \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} \
} while (0)
/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0)
/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);
#if defined (__cplusplus)
}
#endif
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0)
#endif /* ERROR_H_MODULE */
+8 -100
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* FSE : Finite State Entropy codec
* Public Prototypes declaration
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -11,11 +11,6 @@
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef FSE_H
#define FSE_H
@@ -25,7 +20,6 @@ extern "C" {
******************************************/
#include "zstd_deps.h" /* size_t, ptrdiff_t */
/*-*****************************************
* FSE_PUBLIC_API : control library symbols visibility
******************************************/
@@ -53,34 +47,6 @@ extern "C" {
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
/*! FSE_compress() :
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
@return : size of compressed data (<= dstCapacity).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
*/
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*! FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'dstCapacity'.
@return : size of regenerated data (<= maxDstSize),
or an error code, which can be tested using FSE_isError() .
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize);
/*-*****************************************
* Tool functions
******************************************/
@@ -91,20 +57,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/*-*****************************************
* FSE advanced functions
******************************************/
/*! FSE_compress2() :
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
@return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
*/
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/*-*****************************************
* FSE detailed API
******************************************/
@@ -164,8 +116,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
/*! FSE_buildCTable():
Builds `ct`, which must be already allocated, using FSE_createCTable().
@@ -241,23 +191,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
/*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
/*!
Tutorial :
@@ -289,13 +223,11 @@ If there is an error, the function will return an error code, which can be teste
#endif /* FSE_H */
#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
#define FSE_H_FSE_STATIC_LINKING_ONLY
/* *** Dependency *** */
#include "bitstream.h"
/* *****************************************
* Static allocation
*******************************************/
@@ -320,16 +252,6 @@ If there is an error, the function will return an error code, which can be teste
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
*/
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
@@ -347,19 +269,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
typedef enum {
FSE_repeat_none, /**< Cannot use the previous table */
@@ -542,20 +456,20 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable);
U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
BIT_addBits(bitC, statePtr->value, nbBitsOut);
BIT_addBits(bitC, (BitContainerType)statePtr->value, nbBitsOut);
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
}
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
{
BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
BIT_addBits(bitC, (BitContainerType)statePtr->value, statePtr->stateLog);
BIT_flushBits(bitC);
}
/* FSE_getMaxNbBits() :
* Approximate maximum cost of a symbol, in bits.
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@@ -708,10 +622,4 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
#endif /* FSE_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
+28 -116
View File
@@ -1,6 +1,6 @@
/* ******************************************************************
* FSE : Finite State Entropy decoder
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -22,8 +22,8 @@
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h"
#include "zstd_deps.h" /* ZSTD_memcpy */
#include "bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@@ -55,19 +55,6 @@
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
FSE_DTable* FSE_createDTable (unsigned tableLog)
{
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
}
void FSE_freeDTable (FSE_DTable* dt)
{
ZSTD_free(dt);
}
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
@@ -96,7 +83,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
symbolNext[s] = 1;
} else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
symbolNext[s] = normalizedCounter[s];
symbolNext[s] = (U16)normalizedCounter[s];
} } }
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
}
@@ -111,8 +98,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
* our buffer to handle the over-write.
*/
{
U64 const add = 0x0101010101010101ull;
{ U64 const add = 0x0101010101010101ull;
size_t pos = 0;
U64 sv = 0;
U32 s;
@@ -123,14 +109,13 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
pos += n;
}
}
pos += (size_t)n;
} }
/* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the
* The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best.
* We unroll the loop twice, since that is what empirically worked best.
*/
{
size_t position = 0;
@@ -166,7 +151,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} }
@@ -184,49 +169,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
/*-*******************************************************
* Decompression (Byte symbols)
*********************************************************/
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
DTableH->tableLog = 0;
DTableH->fastMode = 0;
cell->newState = 0;
cell->symbol = symbolValue;
cell->nbBits = 0;
return 0;
}
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSV1 = tableMask+1;
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* Build Decoding Table */
DTableH->tableLog = (U16)nbBits;
DTableH->fastMode = 1;
for (s=0; s<maxSV1; s++) {
dinfo[s].newState = 0;
dinfo[s].symbol = (BYTE)s;
dinfo[s].nbBits = (BYTE)nbBits;
}
return 0;
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
void* dst, size_t maxDstSize,
@@ -248,6 +190,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
FSE_initDState(&state1, &bitD, dt);
FSE_initDState(&state2, &bitD, dt);
RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, "");
#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
/* 4 symbols per loop */
@@ -287,32 +231,12 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
break;
} }
return op-ostart;
}
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize,
const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
}
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
assert(op >= ostart);
return (size_t)(op-ostart);
}
typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
FSE_DTable dtable[1]; /* Dynamically sized */
} FSE_DecompressWksp;
@@ -327,13 +251,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
unsigned tableLog;
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
/* correct offset to dtable depends on this property */
FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
/* normal FSE decoding mode */
{
size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
{ size_t const NCountLength =
FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
@@ -342,19 +271,20 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
}
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
{
const void* ptr = wksp->dtable;
const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
}
}
@@ -382,22 +312,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
}
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
{
/* Static analyzer seems unable to understand this table will be properly initialized later */
U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
}
#endif
#endif /* FSE_COMMONDEFS_ONLY */
+84 -171
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* huff0 huffman codec,
* part of Finite State Entropy library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -12,108 +12,26 @@
* You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef HUF_H_298734234
#define HUF_H_298734234
/* *** Dependencies *** */
#include "zstd_deps.h" /* size_t */
/* *** library symbols visibility *** */
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
* HUF symbols remain "private" (internal symbols for library only).
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
# define HUF_PUBLIC_API __declspec(dllexport)
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
#else
# define HUF_PUBLIC_API
#endif
/* ========================== */
/* *** simple functions *** */
/* ========================== */
/** HUF_compress() :
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
* 'dst' buffer must be already allocated.
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
* @return : size of compressed data (<= `dstCapacity`).
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/** HUF_decompress() :
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
* into already allocated buffer 'dst', of minimum size 'dstSize'.
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
* Note : in contrast with FSE, HUF_decompress can regenerate
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
* because it knows size to regenerate (originalSize).
* @return : size of regenerated data (== originalSize),
* or an error code, which can be tested using HUF_isError()
*/
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
/* Error Management */
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
/* *** Advanced function *** */
/** HUF_compress2() :
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
* which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
* *****************************************************************/
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
#include "mem.h" /* U32 */
#include "mem.h" /* U32 */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
/* Error Management */
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
/* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
@@ -154,25 +72,49 @@ typedef U32 HUF_DTable;
/* ****************************************
* Advanced decompression functions
******************************************/
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
/**
* Huffman flags bitset.
* For all flags, 0 is the default value.
*/
typedef enum {
/**
* If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
* Otherwise: Ignored.
*/
HUF_flags_bmi2 = (1 << 0),
/**
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
* If unset: Use heuristic to find the table depth.
*/
HUF_flags_optimalDepth = (1 << 1),
/**
* If set: If the previous table can encode the input, always reuse the previous table.
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
*/
HUF_flags_preferRepeat = (1 << 2),
/**
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
* If unset: Always histogram the entire input.
*/
HUF_flags_suspectUncompressible = (1 << 3),
/**
* If set: Don't use assembly implementations
* If unset: Allow using assembly implementations
*/
HUF_flags_disableAsm = (1 << 4),
/**
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
* If unset: Use the fast decoding loop when possible.
*/
HUF_flags_disableFast = (1 << 5)
} HUF_flags_e;
/* ****************************************
* HUF detailed API
* ****************************************/
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
/*! HUF_compress() does the following:
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
@@ -185,12 +127,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods.
*/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
unsigned HUF_minTableLog(unsigned symbolCardinality);
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@@ -199,6 +141,7 @@ typedef enum {
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
@@ -209,13 +152,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
@@ -241,7 +184,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
int bmi2);
int flags);
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
@@ -249,9 +192,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
/** HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private */
* Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
* Note 2 : is not inlined, as HUF_CElt definition is private
*/
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
typedef struct {
BYTE tableLog;
BYTE maxSymbolValue;
BYTE unused[sizeof(size_t) - 2];
} HUF_CTableHeader;
/** HUF_readCTableHeader() :
* @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
*/
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
/*
* HUF_decompress() does the following:
* 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
@@ -279,32 +235,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* ====================== */
/* single stream variants */
/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
/** HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.
@@ -315,50 +251,27 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
#endif
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */
#endif
/* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif
#endif /* HUF_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
#endif /* HUF_H_298734234 */
+46 -66
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,10 +11,6 @@
#ifndef MEM_H_MODULE
#define MEM_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
@@ -30,15 +26,8 @@ extern "C" {
#if defined(_MSC_VER) /* Visual Studio */
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
#endif
#if defined(__GNUC__)
# define MEM_STATIC static __inline __attribute__((unused))
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define MEM_STATIC static inline
#elif defined(_MSC_VER)
# define MEM_STATIC static __inline
#else
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#endif
/*-**************************************************************
@@ -83,7 +72,6 @@ extern "C" {
typedef signed long long S64;
#endif
/*-**************************************************************
* Memory I/O API
*****************************************************************/
@@ -133,21 +121,15 @@ MEM_STATIC size_t MEM_swapST(size_t in);
/*-**************************************************************
* Memory I/O Implementation
*****************************************************************/
/* MEM_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
* Method 0 : always use `memcpy()`. Safe and portable.
* Method 1 : Use compiler extension to set unaligned access.
* Method 2 : direct access. This method is portable but violate C standard.
* It can generate buggy code on targets depending on alignment.
* In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
* Prefer these methods in priority order (0 > 1 > 2)
* Default : method 1 if supported, else method 0
*/
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# ifdef __GNUC__
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
@@ -165,10 +147,12 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
return 1;
#elif defined(__clang__) && __BIG_ENDIAN__
return 0;
#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
#elif defined(_MSC_VER) && (_M_X64 || _M_IX86)
return 1;
#elif defined(__DMC__) && defined(_M_IX86)
return 1;
#elif defined(__IAR_SYSTEMS_ICC__) && __LITTLE_ENDIAN__
return 1;
#else
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
return one.c[0];
@@ -190,30 +174,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
__pragma( pack(push, 1) )
typedef struct { U16 v; } unalign16;
typedef struct { U32 v; } unalign32;
typedef struct { U64 v; } unalign64;
typedef struct { size_t v; } unalignArch;
__pragma( pack(pop) )
#else
typedef struct { U16 v; } __attribute__((packed)) unalign16;
typedef struct { U32 v; } __attribute__((packed)) unalign32;
typedef struct { U64 v; } __attribute__((packed)) unalign64;
typedef struct { size_t v; } __attribute__((packed)) unalignArch;
#endif
typedef __attribute__((aligned(1))) U16 unalign16;
typedef __attribute__((aligned(1))) U32 unalign32;
typedef __attribute__((aligned(1))) U64 unalign64;
typedef __attribute__((aligned(1))) size_t unalignArch;
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
#else
@@ -257,6 +230,14 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
#endif /* MEM_FORCE_MEMORY_ACCESS */
MEM_STATIC U32 MEM_swap32_fallback(U32 in)
{
return ((in << 24) & 0xff000000 ) |
((in << 8) & 0x00ff0000 ) |
((in >> 8) & 0x0000ff00 ) |
((in >> 24) & 0x000000ff );
}
MEM_STATIC U32 MEM_swap32(U32 in)
{
#if defined(_MSC_VER) /* Visual Studio */
@@ -264,14 +245,25 @@ MEM_STATIC U32 MEM_swap32(U32 in)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
return __builtin_bswap32(in);
#elif defined(__ICCARM__)
return __REV(in);
#else
return ((in << 24) & 0xff000000 ) |
((in << 8) & 0x00ff0000 ) |
((in >> 8) & 0x0000ff00 ) |
((in >> 24) & 0x000000ff );
return MEM_swap32_fallback(in);
#endif
}
MEM_STATIC U64 MEM_swap64_fallback(U64 in)
{
return ((in << 56) & 0xff00000000000000ULL) |
((in << 40) & 0x00ff000000000000ULL) |
((in << 24) & 0x0000ff0000000000ULL) |
((in << 8) & 0x000000ff00000000ULL) |
((in >> 8) & 0x00000000ff000000ULL) |
((in >> 24) & 0x0000000000ff0000ULL) |
((in >> 40) & 0x000000000000ff00ULL) |
((in >> 56) & 0x00000000000000ffULL);
}
MEM_STATIC U64 MEM_swap64(U64 in)
{
#if defined(_MSC_VER) /* Visual Studio */
@@ -280,14 +272,7 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
return __builtin_bswap64(in);
#else
return ((in << 56) & 0xff00000000000000ULL) |
((in << 40) & 0x00ff000000000000ULL) |
((in << 24) & 0x0000ff0000000000ULL) |
((in << 8) & 0x000000ff00000000ULL) |
((in >> 8) & 0x00000000ff000000ULL) |
((in >> 24) & 0x0000000000ff0000ULL) |
((in >> 40) & 0x000000000000ff00ULL) |
((in >> 56) & 0x00000000000000ffULL);
return MEM_swap64_fallback(in);
#endif
}
@@ -434,9 +419,4 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
/* code only tested on 32 and 64 bits systems */
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
#if defined (__cplusplus)
}
#endif
#endif /* MEM_H_MODULE */
+27 -11
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -10,9 +10,9 @@
/* ====== Dependencies ======= */
#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
#include "zstd_deps.h" /* size_t */
#include "debug.h" /* assert */
#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "pool.h"
/* ====== Compiler specifics ====== */
@@ -96,9 +96,7 @@ static void* POOL_thread(void* opaque) {
/* If the intended queue size was 0, signal after finishing job */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->numThreadsBusy--;
if (ctx->queueSize == 1) {
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
}
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
} /* for (;;) */
@@ -128,7 +126,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
* empty and full queues.
*/
ctx->queueSize = queueSize + 1;
ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem);
ctx->queueHead = 0;
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
@@ -142,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
}
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->threadCapacity = 0;
ctx->customMem = customMem;
/* Check for errors */
@@ -175,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) {
/* Join all of the threads */
{ size_t i;
for (i = 0; i < ctx->threadCapacity; ++i) {
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
} }
}
@@ -190,6 +188,17 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_customFree(ctx, ctx->customMem);
}
/*! POOL_joinJobs() :
* Waits for all queued jobs to finish executing.
*/
void POOL_joinJobs(POOL_ctx* ctx) {
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) {
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
}
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
POOL_free (pool);
}
@@ -211,10 +220,10 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
return 0;
}
/* numThreads > threadCapacity */
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1;
/* replace existing thread pool */
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
ZSTD_customFree(ctx->threads, ctx->customMem);
ctx->threads = threadPool;
/* Initialize additional threads */
@@ -262,7 +271,9 @@ static int isQueueFull(POOL_ctx const* ctx) {
static void
POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
{
POOL_job const job = {function, opaque};
POOL_job job;
job.function = function;
job.opaque = opaque;
assert(ctx != NULL);
if (ctx->shutdown) return;
@@ -330,6 +341,11 @@ void POOL_free(POOL_ctx* ctx) {
(void)ctx;
}
void POOL_joinJobs(POOL_ctx* ctx){
assert(!ctx || ctx == &g_poolCtx);
(void)ctx;
}
int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
(void)ctx; (void)numThreads;
return 0;
+8 -11
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,10 +11,6 @@
#ifndef POOL_H
#define POOL_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_deps.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
@@ -38,10 +34,16 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
*/
void POOL_free(POOL_ctx* ctx);
/*! POOL_joinJobs() :
* Waits for all queued jobs to finish executing.
*/
void POOL_joinJobs(POOL_ctx* ctx);
/*! POOL_resize() :
* Expands or shrinks pool's number of threads.
* This is more efficient than releasing + creating a new context,
* since it tries to preserve and re-use existing threads.
* since it tries to preserve and reuse existing threads.
* `numThreads` must be at least 1.
* @return : 0 when resize was successful,
* !0 (typically 1) if there is an error.
@@ -76,9 +78,4 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
*/
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
#if defined (__cplusplus)
}
#endif
#endif
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -12,7 +12,7 @@
#define ZSTD_PORTABILITY_MACROS_H
/**
* This header file contains macro defintions to support portability.
* This header file contains macro definitions to support portability.
* This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code.
*
@@ -68,30 +68,45 @@
/* Mark the internal assembly functions as hidden */
#ifdef __ELF__
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
#elif defined(__APPLE__)
# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
#else
# define ZSTD_HIDE_ASM_FUNCTION(func)
#endif
/* Compile time determination of BMI2 support */
#ifndef STATIC_BMI2
# if defined(__BMI2__)
# define STATIC_BMI2 1
# elif defined(_MSC_VER) && defined(__AVX2__)
# define STATIC_BMI2 1 /* MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 */
# endif
#endif
#ifndef STATIC_BMI2
# define STATIC_BMI2 0
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
# if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X64)) \
&& (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
# define DYNAMIC_BMI2 1
# else
# define DYNAMIC_BMI2 0
# endif
#endif
/**
* Only enable assembly for GNUC comptabile compilers,
* Only enable assembly for GNU C compatible compilers,
* because other platforms may not support GAS assembly syntax.
*
* Only enable assembly for Linux / MacOS, other platforms may
* Only enable assembly for Linux / MacOS / Win32, other platforms may
* work, but they haven't been tested. This could likely be
* extended to BSD systems.
*
@@ -99,7 +114,7 @@
* 100% of code to be instrumented to work.
*/
#if defined(__GNUC__)
# if defined(__linux__) || defined(__linux) || defined(__APPLE__)
# if defined(__linux__) || defined(__linux) || defined(__APPLE__) || defined(_WIN32)
# if ZSTD_MEMORY_SANITIZER
# define ZSTD_ASM_SUPPORTED 0
# elif ZSTD_DATAFLOW_SANITIZER
@@ -134,4 +149,23 @@
# define ZSTD_ENABLE_ASM_X86_64_BMI2 0
#endif
/*
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
* assembly sources when CET is enabled.
*
* Additionally, any function that may be called indirectly must begin
* with ZSTD_CET_ENDBRANCH.
*/
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
&& defined(__has_include)
# if __has_include(<cet.h>)
# include <cet.h>
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
# endif
#endif
#ifndef ZSTD_CET_ENDBRANCH
# define ZSTD_CET_ENDBRANCH
#endif
#endif /* ZSTD_PORTABILITY_MACROS_H */
+74 -14
View File
@@ -23,8 +23,7 @@ int g_ZSTD_threading_useless_symbol;
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
/**
* Windows minimalist Pthread Wrapper, based on :
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
* Windows minimalist Pthread Wrapper
*/
@@ -35,37 +34,94 @@ int g_ZSTD_threading_useless_symbol;
/* === Implementation === */
typedef struct {
void* (*start_routine)(void*);
void* arg;
int initialized;
ZSTD_pthread_cond_t initialized_cond;
ZSTD_pthread_mutex_t initialized_mutex;
} ZSTD_thread_params_t;
static unsigned __stdcall worker(void *arg)
{
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
thread->arg = thread->start_routine(thread->arg);
void* (*start_routine)(void*);
void* thread_arg;
/* Initialized thread_arg and start_routine and signal main thread that we don't need it
* to wait any longer.
*/
{
ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg;
thread_arg = thread_param->arg;
start_routine = thread_param->start_routine;
/* Signal main thread that we are running and do not depend on its memory anymore */
ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex);
thread_param->initialized = 1;
ZSTD_pthread_cond_signal(&thread_param->initialized_cond);
ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex);
}
start_routine(thread_arg);
return 0;
}
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg)
{
ZSTD_thread_params_t thread_param;
(void)unused;
thread->arg = arg;
thread->start_routine = start_routine;
thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
if (!thread->handle)
if (thread==NULL) return -1;
*thread = NULL;
thread_param.start_routine = start_routine;
thread_param.arg = arg;
thread_param.initialized = 0;
/* Setup thread initialization synchronization */
if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
/* Should never happen on Windows */
return -1;
}
if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) {
/* Should never happen on Windows */
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return -1;
}
/* Spawn thread */
*thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
if (*thread==NULL) {
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return errno;
else
return 0;
}
/* Wait for thread to be initialized */
ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex);
while(!thread_param.initialized) {
ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex);
}
ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex);
ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return 0;
}
int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
int ZSTD_pthread_join(ZSTD_pthread_t thread)
{
DWORD result;
if (!thread.handle) return 0;
if (!thread) return 0;
result = WaitForSingleObject(thread, INFINITE);
CloseHandle(thread);
result = WaitForSingleObject(thread.handle, INFINITE);
switch (result) {
case WAIT_OBJECT_0:
if (value_ptr) *value_ptr = thread.arg;
return 0;
case WAIT_ABANDONED:
return EINVAL;
@@ -83,6 +139,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
assert(mutex != NULL);
*mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
@@ -91,6 +148,7 @@ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t con
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{
assert(mutex != NULL);
if (!*mutex)
return 0;
{
@@ -102,6 +160,7 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
assert(cond != NULL);
*cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
@@ -110,6 +169,7 @@ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const*
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{
assert(cond != NULL);
if (!*cond)
return 0;
{
+5 -18
View File
@@ -16,15 +16,10 @@
#include "debug.h"
#if defined (__cplusplus)
extern "C" {
#endif
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
/**
* Windows minimalist Pthread Wrapper, based on :
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
* Windows minimalist Pthread Wrapper
*/
#ifdef WINVER
# undef WINVER
@@ -62,22 +57,17 @@ extern "C" {
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
/* ZSTD_pthread_create() and ZSTD_pthread_join() */
typedef struct {
HANDLE handle;
void* (*start_routine)(void*);
void* arg;
} ZSTD_pthread_t;
typedef HANDLE ZSTD_pthread_t;
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg);
int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
int ZSTD_pthread_join(ZSTD_pthread_t thread);
/**
* add here more wrappers as required
*/
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */
# include <pthread.h>
@@ -99,7 +89,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
#else /* DEBUGLEVEL >= 1 */
@@ -124,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#define ZSTD_pthread_join(a) pthread_join((a),NULL)
#endif
@@ -148,8 +138,5 @@ typedef int ZSTD_pthread_cond_t;
#endif /* ZSTD_MULTITHREAD */
#if defined (__cplusplus)
}
#endif
#endif /* THREADING_H_938743 */
+5 -11
View File
@@ -1,24 +1,18 @@
/*
* xxHash - Fast Hash algorithm
* Copyright (c) Yann Collet, Facebook, Inc.
*
* You can contact the author at :
* - xxHash homepage: http://www.xxhash.com
* - xxHash source repository : https://github.com/Cyan4973/xxHash
* xxHash - Extremely Fast Hash algorithm
* Copyright (c) Yann Collet - Meta Platforms, Inc
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"
File diff suppressed because it is too large Load Diff
+1 -36
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
* Dependencies
***************************************/
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h"
#include "zstd_internal.h"
@@ -47,37 +46,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
/*! ZSTD_getErrorString() :
* provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
/*=**************************************************************
* Custom allocator
****************************************************************/
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}
+13 -1
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -24,6 +24,18 @@
#ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON
/* Even though we use qsort_r only for the dictionary builder, the macro
* _GNU_SOURCE has to be declared *before* the inclusion of any standard
* header and the script 'combine.sh' combines the whole zstd source code
* in a single file.
*/
#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \
defined(__CYGWIN__) || defined(__MSYS__)
#if !defined(_GNU_SOURCE) && !defined(__ANDROID__) /* NDK doesn't ship qsort_r(). */
#define _GNU_SOURCE
#endif
#endif
#include <limits.h>
#include <stddef.h>
#include <string.h>
+13 -182
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,6 @@
#include "../zstd.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
#ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
@@ -40,10 +39,6 @@
# define ZSTD_TRACE 0
#endif
#if defined (__cplusplus)
extern "C" {
#endif
/* ---- static assert (debug) --- */
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
#define ZSTD_isError ERR_isError /* for inlining */
@@ -93,16 +88,17 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define ZSTD_FRAMECHECKSUMSIZE 4
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
#define MIN_LITERALS_FOR_4_STREAMS 6
#define HufLog 12
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e;
#define LONGNBSEQ 0x7F00
#define MINMATCH 3
#define Litbits 8
#define LitHufLog 11
#define MaxLit ((1<<Litbits) - 1)
#define MaxML 52
#define MaxLL 35
@@ -113,6 +109,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define LLFSELog 9
#define OffFSELog 8
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
#define MaxMLBits 16
#define MaxLLBits 16
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
/* Each table cannot take more than #symbols * FSELog bits */
@@ -176,7 +174,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
ZSTD_memcpy(dst, src, 8);
#endif
}
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
/* Need to use memmove here since the literal buffer can now be located within
the dst buffer. In circumstances where the op "catches up" to where the
@@ -196,7 +194,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
ZSTD_memcpy(dst, copy16_buf, 16);
#endif
}
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
@@ -225,7 +223,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */
do {
COPY8(op, ip)
COPY8(op, ip);
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
@@ -235,12 +233,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* one COPY16() in the first call. Then, do two calls per loop since
* at that point it is more likely to have a high trip count.
*/
#ifdef __aarch64__
do {
COPY16(op, ip);
}
while (op < oend);
#else
ZSTD_copy16(op, ip);
if (16 >= length) return;
op += 16;
@@ -250,7 +242,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
COPY16(op, ip);
}
while (op < oend);
#endif
}
}
@@ -283,62 +274,6 @@ typedef enum {
/*-*******************************************
* Private declarations
*********************************************/
typedef struct seqDef_s {
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
U16 litLength;
U16 mlBase; /* mlBase == matchLength - MINMATCH */
} seqDef;
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
typedef enum {
ZSTD_llt_none = 0, /* no longLengthType */
ZSTD_llt_literalLength = 1, /* represents a long literal */
ZSTD_llt_matchLength = 2 /* represents a long match */
} ZSTD_longLengthType_e;
typedef struct {
seqDef* sequencesStart;
seqDef* sequences; /* ptr to end of sequences */
BYTE* litStart;
BYTE* lit; /* ptr to end of literals */
BYTE* llCode;
BYTE* mlCode;
BYTE* ofCode;
size_t maxNbSeq;
size_t maxNbLit;
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
* the existing value of the litLength or matchLength by 0x10000.
*/
ZSTD_longLengthType_e longLengthType;
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
} seqStore_t;
typedef struct {
U32 litLength;
U32 matchLength;
} ZSTD_sequenceLength;
/**
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
*/
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
{
ZSTD_sequenceLength seqLen;
seqLen.litLength = seq->litLength;
seqLen.matchLength = seq->mlBase + MINMATCH;
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
seqLen.litLength += 0xFFFF;
}
if (seqStore->longLengthType == ZSTD_llt_matchLength) {
seqLen.matchLength += 0xFFFF;
}
}
return seqLen;
}
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
@@ -347,111 +282,11 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
*/
typedef struct {
size_t nbBlocks;
size_t compressedSize;
unsigned long long decompressedBound;
} ZSTD_frameSizeInfo; /* decompress & legacy */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
/* custom memory allocation functions */
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
# if STATIC_BMI2 == 1
return _lzcnt_u32(val)^31;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
/**
* Counts the number of trailing zeros of a `size_t`.
* Most compilers should support CTZ as a builtin. A backup
* implementation is provided if the builtin isn't supported, but
* it may not be terribly efficient.
*/
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
{
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return __builtin_ctzll((U64)val);
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return __builtin_ctz((U32)val);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;
@@ -467,13 +302,13 @@ typedef struct {
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
/* Used by: decompress, fullbench (does not get its definition from here) */
/* Used by: decompress, fullbench */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr);
/*! ZSTD_decodeSeqHeaders() :
* decode sequence header from src */
/* Used by: decompress, fullbench (does not get its definition from here) */
/* Used by: zstd_decompress_block, fullbench */
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);
@@ -486,8 +321,4 @@ MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
}
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_CCOMMON_H_MODULE */
+6 -13
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,23 +11,20 @@
#ifndef ZSTD_TRACE_H
#define ZSTD_TRACE_H
#if defined (__cplusplus)
extern "C" {
#endif
#include <stddef.h>
/* weak symbol support
* For now, enable conservatively:
* - Only GNUC
* - Only ELF
* - Only x86-64 and i386
* - Only x86-64, i386, aarch64 and risc-v.
* Also, explicitly disable on platforms known not to work so they aren't
* forgotten in the future.
*/
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
defined(__GNUC__) && defined(__ELF__) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86) || defined(__aarch64__) || defined(__riscv)) && \
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
!defined(__CYGWIN__) && !defined(_AIX)
# define ZSTD_HAVE_WEAK_SYMBOLS 1
@@ -64,7 +61,7 @@ typedef struct {
/**
* Non-zero if streaming (de)compression is used.
*/
unsigned streaming;
int streaming;
/**
* The dictionary ID.
*/
@@ -73,7 +70,7 @@ typedef struct {
* Is the dictionary cold?
* Only set on decompression.
*/
unsigned dictionaryIsCold;
int dictionaryIsCold;
/**
* The dictionary size or zero if no dictionary.
*/
@@ -156,8 +153,4 @@ ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
#endif /* ZSTD_TRACE */
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_TRACE_H */
+1 -1
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
+15 -131
View File
@@ -1,6 +1,6 @@
/* ******************************************************************
* FSE : Finite State Entropy encoder
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -25,7 +25,8 @@
#include "../common/error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@@ -90,7 +91,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@@ -191,7 +192,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
break;
default :
assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
@@ -224,8 +225,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+ 4 /* bitCount initialized at 4 */
+ 2 /* first two symbols may use one additional bit each */) / 8)
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */;
+ 1 /* round up to whole nb bytes */
+ 2 /* additional two bytes for bitstream flush */;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
@@ -254,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
/* Init */
remaining = tableSize+1; /* +1 for extra accuracy */
threshold = tableSize;
nbBits = tableLog+1;
nbBits = (int)tableLog+1;
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
@@ -273,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
}
while (symbol >= start+3) {
start+=3;
bitStream += 3 << bitCount;
bitStream += 3U << bitCount;
bitCount += 2;
}
bitStream += (symbol-start) << bitCount;
@@ -293,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
count++; /* +1 for extra accuracy */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream += count << bitCount;
bitStream += (U32)count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previousIs0 = (count==1);
@@ -321,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
return (out-ostart);
assert(out >= ostart);
return (size_t)(out-ostart);
}
@@ -342,21 +344,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code
****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)ZSTD_malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
@@ -364,7 +356,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@@ -532,40 +524,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog;
}
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
@@ -664,78 +622,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
void* scratchBuffer = (void*)(CTable + CTableSize);
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
/* init conditions */
if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
if (srcSize <= 1) return 0; /* Not compressible */
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
/* Write table description header */
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
return op-ostart;
}
typedef struct {
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
union {
U32 hist_wksp[HIST_WKSP_SIZE_U32];
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
} workspace;
} fseWkspMax_t;
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
}
#endif
#endif /* FSE_COMMONDEFS_ONLY */
+11 -1
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
void HIST_add(unsigned* count, const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
while (ip<end) {
count[*ip++]++;
}
}
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
+8 -1
View File
@@ -1,7 +1,7 @@
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
/*! HIST_add() :
* Lowest level: just add nb of occurrences of characters from @src into @count.
* @count is not reset. @count array is presumed large enough (i.e. 1 KB).
@ This function does not need any additional stack memory.
*/
void HIST_add(unsigned* count, const void* src, size_t srcSize);
+283 -189
View File
@@ -1,6 +1,6 @@
/* ******************************************************************
* Huffman encoder, part of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -29,9 +29,9 @@
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "../common/error_private.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@@ -42,13 +42,67 @@
/* **************************************************************
* Utils
* Required declarations
****************************************************************/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showU32(const U32* arr, size_t size)
{
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", arr[u]); (void)arr;
}
RAWLOG(6, " \n");
return size;
}
static size_t HUF_getNbBits(HUF_CElt elt);
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
#endif
/* *******************************************************
* HUF : Huffman block compression
@@ -89,7 +143,10 @@ typedef struct {
S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp;
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
static size_t
HUF_compressWeights(void* dst, size_t dstSize,
const void* weightTable, size_t wtSize,
void* workspace, size_t workspaceSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
@@ -140,7 +197,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
static size_t HUF_getValue(HUF_CElt elt)
{
return elt & ~0xFF;
return elt & ~(size_t)0xFF;
}
static size_t HUF_getValueFast(HUF_CElt elt)
@@ -163,6 +220,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
}
}
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
{
HUF_CTableHeader header;
ZSTD_memcpy(&header, ctable, sizeof(header));
return header;
}
static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
{
HUF_CTableHeader header;
HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
ZSTD_memset(&header, 0, sizeof(header));
assert(tableLog < 256);
header.tableLog = (BYTE)tableLog;
assert(maxSymbolValue < 256);
header.maxSymbolValue = (BYTE)maxSymbolValue;
ZSTD_memcpy(ctable, &header, sizeof(header));
}
typedef struct {
HUF_CompressWeightsWksp wksp;
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
@@ -178,6 +254,11 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@@ -207,16 +288,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
return ((maxSymbolValue+1)/2) + 1;
}
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
HUF_WriteCTableWksp wksp;
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
}
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
@@ -234,7 +305,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
CTable[0] = tableLog;
*maxSymbolValuePtr = nbSymbols - 1;
HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
@@ -266,74 +339,71 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
}
*maxSymbolValuePtr = nbSymbols - 1;
return readSize;
}
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{
const HUF_CElt* ct = CTable + 1;
const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
return 0;
return (U32)HUF_getNbBits(ct[symbolValue]);
}
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/**
* HUF_setMaxHeight():
* Enforces maxNbBits on the Huffman tree described in huffNode.
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
*
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
* the tree to so that it is a valid canonical Huffman tree.
* It attempts to convert all nodes with nbBits > @targetNbBits
* to employ @targetNbBits instead. Then it adjusts the tree
* so that it remains a valid canonical Huffman tree.
*
* @pre The sum of the ranks of each symbol == 2^largestBits,
* where largestBits == huffNode[lastNonNull].nbBits.
* @post The sum of the ranks of each symbol == 2^largestBits,
* where largestBits is the return value <= maxNbBits.
* where largestBits is the return value (expected <= targetNbBits).
*
* @param huffNode The Huffman tree modified in place to enforce maxNbBits.
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
* It's presumed sorted, from most frequent to rarest symbol.
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree
* @param targetNbBits The allowed number of bits, which the Huffman tree
* may not respect. After this function the Huffman tree will
* respect maxNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment,
* necessarily no more than maxNbBits.
* respect targetNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment.
*/
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{
const U32 largestBits = huffNode[lastNonNull].nbBits;
/* early exit : no elt > maxNbBits, so the tree is already valid. */
if (largestBits <= maxNbBits) return largestBits;
/* early exit : no elt > targetNbBits, so the tree is already valid. */
if (largestBits <= targetNbBits) return largestBits;
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits);
const U32 baseCost = 1 << (largestBits - targetNbBits);
int n = (int)lastNonNull;
/* Adjust any ranks > maxNbBits to maxNbBits.
/* Adjust any ranks > targetNbBits to targetNbBits.
* Compute totalCost, which is how far the sum of the ranks is
* we are over 2^largestBits after adjust the offending ranks.
*/
while (huffNode[n].nbBits > maxNbBits) {
while (huffNode[n].nbBits > targetNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
huffNode[n].nbBits = (BYTE)maxNbBits;
huffNode[n].nbBits = (BYTE)targetNbBits;
n--;
}
/* n stops at huffNode[n].nbBits <= maxNbBits */
assert(huffNode[n].nbBits <= maxNbBits);
/* n end at index of smallest symbol using < maxNbBits */
while (huffNode[n].nbBits == maxNbBits) --n;
/* n stops at huffNode[n].nbBits <= targetNbBits */
assert(huffNode[n].nbBits <= targetNbBits);
/* n end at index of smallest symbol using < targetNbBits */
while (huffNode[n].nbBits == targetNbBits) --n;
/* renorm totalCost from 2^largestBits to 2^maxNbBits
/* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */
assert((totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - maxNbBits);
assert(((U32)totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0);
/* repay normalized cost */
@@ -342,19 +412,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits;
{ U32 currentNbBits = targetNbBits;
int pos;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
rankLast[maxNbBits-currentNbBits] = (U32)pos;
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
rankLast[targetNbBits-currentNbBits] = (U32)pos;
} }
while (totalCost > 0) {
/* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank.
*/
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1];
@@ -394,7 +464,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
rankLast[nBitsToDecrease] = noSymbol;
else {
rankLast[nBitsToDecrease]--;
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
}
} /* while (totalCost > 0) */
@@ -406,11 +476,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
* TODO.
*/
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
/* special case : no rank 1 symbol (using maxNbBits-1);
* let's create one from largest rank 0 (using maxNbBits).
/* special case : no rank 1 symbol (using targetNbBits-1);
* let's create one from largest rank 0 (using targetNbBits).
*/
if (rankLast[1] == noSymbol) {
while (huffNode[n].nbBits == maxNbBits) n--;
while (huffNode[n].nbBits == targetNbBits) n--;
huffNode[n+1].nbBits--;
assert(n >= 0);
rankLast[1] = (U32)(n+1);
@@ -424,7 +494,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
} /* repay normalized cost */
} /* there are several too large elements (at least >= 2) */
return maxNbBits;
return targetNbBits;
}
typedef struct {
@@ -432,7 +502,7 @@ typedef struct {
U16 curr;
} rankPos;
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
/* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE 192
@@ -451,8 +521,8 @@ typedef struct {
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
*/
#define RANK_POSITION_MAX_COUNT_LOG 32
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
/* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@@ -460,7 +530,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
}
/* Helper swap function for HUF_quickSortPartition() */
@@ -583,7 +653,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
/* Sort each bucket. */
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
U32 const bucketStartIdx = rankPosition[n].base;
if (bucketSize > 1) {
assert(bucketStartIdx < maxSymbolValue1);
@@ -594,6 +664,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
assert(HUF_isSorted(huffNode, maxSymbolValue1));
}
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
@@ -614,6 +685,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
int lowS, lowN;
int nodeNb = STARTNODE;
int n, nodeRoot;
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
/* init for parents */
nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -640,6 +712,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
return nonNullRank;
}
@@ -674,31 +748,40 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++)
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
CTable[0] = maxNbBits;
HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
}
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
size_t
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize)
{
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
HUF_buildCTable_wksp_tables* const wksp_tables =
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
int nonNullRank;
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
/* safety checks */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
return ERROR(workSpace_tooSmall);
return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
return ERROR(maxSymbolValue_tooLarge);
return ERROR(maxSymbolValue_tooLarge);
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
/* build tree */
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
/* enforce maxTableLog */
/* determine and enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
@@ -719,13 +802,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
}
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
HUF_CElt const* ct = CTable + 1;
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
}
return !bad;
HUF_CTableHeader header = HUF_readCTableHeader(CTable);
HUF_CElt const* ct = CTable + 1;
int bad = 0;
int s;
assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
if (header.maxSymbolValue < maxSymbolValue)
return 0;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
}
return !bad;
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
@@ -807,7 +897,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1
{
size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
@@ -887,7 +977,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{
size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
}
}
@@ -967,17 +1057,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
U32 const tableLog = (U32)CTable[0];
U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
HUF_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
{ BYTE* op = ostart;
size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@@ -1048,9 +1138,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
const HUF_CElt* CTable, const int flags)
{
if (bmi2) {
if (flags & HUF_flags_bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
}
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -1061,28 +1151,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2)
const HUF_CElt* CTable, const int flags)
{
(void)bmi2;
(void)flags;
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
}
#endif
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable, int bmi2)
const HUF_CElt* CTable, int flags)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
@@ -1096,7 +1181,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
op += 6; /* jumpTable */
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
@@ -1104,7 +1189,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
@@ -1112,7 +1197,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
@@ -1121,7 +1206,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
assert(ip <= iend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0;
op += cSize;
}
@@ -1129,14 +1214,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
return (size_t)(op-ostart);
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -1144,11 +1224,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{
size_t const cSize = (nbStreams==HUF_singleStream) ?
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
@@ -1171,6 +1251,81 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
{
unsigned cardinality = 0;
unsigned i;
for (i = 0; i < maxSymbolValue + 1; i++) {
if (count[i] != 0) cardinality += 1;
}
return cardinality;
}
unsigned HUF_minTableLog(unsigned symbolCardinality)
{
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
return minBitsSymbols;
}
unsigned HUF_optimalTableLog(
unsigned maxTableLog,
size_t srcSize,
unsigned maxSymbolValue,
void* workSpace, size_t wkspSize,
HUF_CElt* table,
const unsigned* count,
int flags)
{
assert(srcSize > 1); /* Not supported, RLE should be used instead */
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
if (!(flags & HUF_flags_optimalDepth)) {
/* cheap evaluation, based on FSE */
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
}
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
unsigned optLog = maxTableLog, optLogGuess;
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
{ size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
}
if (ERR_isError(hSize)) continue;
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
if (newSize > optSize + 1) {
break;
}
if (newSize < optSize) {
optSize = newSize;
optLog = optLogGuess;
}
}
assert(optLog <= HUF_TABLELOG_MAX);
return optLog;
}
}
/* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
@@ -1180,14 +1335,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2, unsigned suspectUncompressible)
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */
@@ -1201,16 +1356,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
}
/* If uncompressible data is suspected, do a smaller sampling first */
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
size_t largestTotal = 0;
DEBUGLOG(5, "input suspected incompressible : sampling to check");
{ unsigned maxSymbolValueBegin = maxSymbolValue;
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
largestTotal += largestBegin;
@@ -1227,6 +1383,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
}
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
/* Check validity of previous table */
if ( repeat
@@ -1235,25 +1392,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
}
/* Zero unused symbols in CTable, so we can check it for validity */
{
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
}
/* Write table description header */
@@ -1266,7 +1418,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, oldHufTable, bmi2);
nbStreams, oldHufTable, flags);
} }
/* Use the new huffman table */
@@ -1278,93 +1430,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
}
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
nbStreams, table->CTable, bmi2);
}
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
nbStreams, table->CTable, flags);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
int bmi2, unsigned suspectUncompressible)
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2, suspectUncompressible);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
repeat, flags);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* consider skipping quickly
* re-use an existing huffman compression table */
* reuse an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
hufTable, repeat, flags);
}
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
/** HUF_buildCTable() :
* @return : maxNbBits
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
{
HUF_buildCTable_wksp_tables workspace;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
}
size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
}
#endif
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -13,11 +13,36 @@
***************************************/
#include "zstd_compress_literals.h"
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showHexa(const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
RAWLOG(5, " %02X", ip[u]); (void)ip;
}
RAWLOG(5, " \n");
return srcSize;
}
#endif
/* **************************************************************
* Literals compression - special cases
****************************************************************/
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
switch(flSize)
@@ -36,16 +61,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
}
ZSTD_memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize;
}
static int allBytesIdentical(const void* src, size_t srcSize)
{
assert(srcSize >= 1);
assert(src != NULL);
{ const BYTE b = ((const BYTE*)src)[0];
size_t p;
for (p=1; p<srcSize; p++) {
if (((const BYTE*)src)[p] != b) return 0;
}
return 1;
}
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
assert(dstCapacity >= 4); (void)dstCapacity;
assert(allBytesIdentical(src, srcSize));
switch(flSize)
{
@@ -63,28 +102,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
}
ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1;
}
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible)
/* ZSTD_minLiteralsToCompress() :
* returns minimal amount of literals
* for literal compression to even be attempted.
* Minimum is made tighter as compression strategy increases.
*/
static size_t
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
{
assert((int)strategy >= 0);
assert((int)strategy <= 9);
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
{ int const shift = MIN(9-(int)strategy, 3);
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
}
size_t ZSTD_compressLiterals (
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy,
int disableLiteralCompression,
int suspectUncompressible,
int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed;
SymbolEncodingType_e hType = set_compressed;
size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
disableLiteralCompression, (U32)srcSize);
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
disableLiteralCompression, (U32)srcSize, dstCapacity);
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
/* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@@ -92,40 +154,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
/* if too small, don't even attempt compression (speed opt) */
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
int const flags = 0
| (bmi2 ? HUF_flags_bmi2 : 0)
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ?
HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
src, srcSize,
HUF_SYMBOLVALUE_MAX, LitHufLog,
entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable,
&repeat, flags);
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table");
DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat;
}
}
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
{ size_t const minGain = ZSTD_minGain(srcSize, strategy);
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
/* A return value of 1 signals that the alphabet consists of a single symbol.
* However, in some rare circumstances, it could be the compressed size (a single byte).
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
* (it's also necessary to not generate statistics).
* Therefore, in such a case, actively check that all bytes are identical. */
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) {
/* using a newly constructed table */
@@ -136,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -16,16 +16,24 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressRleLiteralsBlock() :
* Conditions :
* - All bytes in @src are identical
* - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
/* ZSTD_compressLiterals():
* @entropyWorkspace: must be aligned on 4-bytes boundaries
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
* @suspectUncompressible: sampling checks, to potentially skip huffman coding
*/
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2,
unsigned suspectUncompressible);
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
int suspectUncompressible,
int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -58,7 +58,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
{
/* Heuristic: This should cover most blocks <= 16K and
* start to fade out after 16K to about 32K depending on
* comprssibility.
* compressibility.
*/
return nbSeq >= 2048;
}
@@ -153,20 +153,20 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
return cost >> 8;
}
symbolEncodingType_e
SymbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_DefaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/
@@ -241,7 +241,7 @@ typedef struct {
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@@ -293,7 +293,7 @@ ZSTD_encodeSequences_body(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
BIT_CStream_t blockStream;
FSE_CState_t stateMatchLength;
@@ -387,7 +387,7 @@ ZSTD_encodeSequences_default(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
@@ -405,7 +405,7 @@ ZSTD_encodeSequences_bmi2(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
SeqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
@@ -421,7 +421,7 @@ size_t ZSTD_encodeSequences(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
{
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
#if DYNAMIC_BMI2
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,26 +11,27 @@
#ifndef ZSTD_COMPRESS_SEQUENCES_H
#define ZSTD_COMPRESS_SEQUENCES_H
#include "zstd_compress_internal.h" /* SeqDef */
#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
#include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */
typedef enum {
ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1
} ZSTD_defaultPolicy_e;
} ZSTD_DefaultPolicy_e;
symbolEncodingType_e
SymbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_DefaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy);
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@@ -42,7 +43,7 @@ size_t ZSTD_encodeSequences(
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
size_t ZSTD_fseBitCost(
FSE_CTable const* ctable,
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -36,13 +36,14 @@
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
* @return : compressed size of literals section of a sub-block
* Or 0 if it unable to compress.
* Or 0 if unable to compress.
* Or error code */
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
const BYTE* literals, size_t litSize,
void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
{
size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@@ -50,11 +51,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart + lhSize;
U32 const singleStream = lhSize == 3;
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0;
(void)bmi2; /* TODO bmi2... */
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
*entropyWritten = 0;
@@ -76,9 +75,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
}
/* TODO bmi2 */
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
op += cSize;
cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) {
@@ -103,7 +102,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
@@ -123,26 +122,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
}
*entropyWritten = 1;
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
return op-ostart;
return (size_t)(op-ostart);
}
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart;
static size_t
ZSTD_seqDecompressedSize(SeqStore_t const* seqStore,
const SeqDef* sequences, size_t nbSeqs,
size_t litSize, int lastSubBlock)
{
size_t matchLengthSum = 0;
size_t litLengthSum = 0;
(void)(litLengthSum); /* suppress unused variable warning on some environments */
while (send-sp > 0) {
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
size_t n;
for (n=0; n<nbSeqs; n++) {
const ZSTD_SequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
litLengthSum += seqLen.litLength;
matchLengthSum += seqLen.matchLength;
sp++;
}
assert(litLengthSum <= litSize);
if (!lastSequence) {
DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
(unsigned)nbSeqs, (const void*)sequences,
(unsigned)litLengthSum, (unsigned)matchLengthSum);
if (!lastSubBlock)
assert(litLengthSum == litSize);
}
else
assert(litLengthSum <= litSize);
(void)litLengthSum;
return matchLengthSum + litSize;
}
@@ -156,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
* @return : compressed size of sequences section of a sub-block
* Or 0 if it is unable to compress
* Or error code. */
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const seqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
static size_t
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
const SeqDef* sequences, size_t nbSeq,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst;
@@ -176,14 +180,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, "");
if (nbSeq < 0x7F)
if (nbSeq < 128)
*op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ)
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) {
return op - ostart;
return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
@@ -205,7 +209,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
}
{ size_t const bitstreamSize = ZSTD_encodeSequences(
op, oend - op,
op, (size_t)(oend - op),
fseTables->matchlengthCTable, mlCode,
fseTables->offcodeCTable, ofCode,
fseTables->litlengthCTable, llCode,
@@ -249,7 +253,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
#endif
*entropyWritten = 1;
return op - ostart;
return (size_t)(op - ostart);
}
/** ZSTD_compressSubBlock() :
@@ -258,7 +262,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
* Or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
const seqDef* sequences, size_t nbSeq,
const SeqDef* sequences, size_t nbSeq,
const BYTE* literals, size_t litSize,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
const ZSTD_CCtx_params* cctxParams,
@@ -275,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
&entropyMetadata->hufMetadata, literals, litSize,
op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
op, (size_t)(oend-op),
bmi2, writeLitEntropy, litEntropyWritten);
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
if (cLitSize == 0) return 0;
op += cLitSize;
@@ -285,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
sequences, nbSeq,
llCode, mlCode, ofCode,
cctxParams,
op, oend-op,
op, (size_t)(oend-op),
bmi2, writeSeqEntropy, seqEntropyWritten);
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
if (cSeqSize == 0) return 0;
op += cSeqSize;
}
/* Write block header */
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(ostart, cBlockHeader24);
}
return op-ostart;
return (size_t)(op-ostart);
}
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -322,7 +327,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
return 0;
}
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type,
const BYTE* codeTable, unsigned maxCode,
size_t nbSeq, const FSE_CTable* fseCTable,
const U8* additionalBits,
@@ -385,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
typedef struct {
size_t estLitSize;
size_t estBlockSize;
} EstimatedBlockSize;
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
@@ -393,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy) {
size_t cSizeEstimate = 0;
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy);
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
int writeLitEntropy, int writeSeqEntropy)
{
EstimatedBlockSize ebs;
ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy);
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy);
return cSizeEstimate + ZSTD_blockHeaderSize;
ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
return ebs;
}
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -415,14 +426,57 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
return 0;
}
static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount)
{
size_t n, total = 0;
assert(sp != NULL);
for (n=0; n<seqCount; n++) {
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
}
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
return total;
}
#define BYTESCALE 256
static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs,
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
int firstSubBlock)
{
size_t n, budget = 0, inSize=0;
/* entropy headers */
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
assert(firstSubBlock==0 || firstSubBlock==1);
budget += headerSize;
/* first sequence => at least one sequence*/
budget += sp[0].litLength * avgLitCost + avgSeqCost;
if (budget > targetBudget) return 1;
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
/* loop over sequences */
for (n=1; n<nbSeqs; n++) {
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
budget += currentCost;
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
/* stop when sub-block budget is reached */
if ( (budget > targetBudget)
/* though continue to expand until the sub-block is deemed compressible */
&& (budget < inSize * BYTESCALE) )
break;
}
return n;
}
/** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them.
* Entropy will be written to the first block.
* The following blocks will use repeat mode to compress.
* All sub-blocks are compressed blocks (no raw or rle blocks).
* @return : compressed size of the super block (which is multiple ZSTD blocks)
* Or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
* Entropy will be written into the first block.
* The following blocks use repeat_mode to compress.
* Sub-blocks are all compressed, except the last one when beneficial.
* @return : compressed size of the super block (which features multiple ZSTD blocks)
* or 0 if it failed to compress. */
static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr,
const ZSTD_compressedBlockState_t* prevCBlock,
ZSTD_compressedBlockState_t* nextCBlock,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
@@ -432,12 +486,14 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const int bmi2, U32 lastBlock,
void* workspace, size_t wkspSize)
{
const seqDef* const sstart = seqStorePtr->sequencesStart;
const seqDef* const send = seqStorePtr->sequences;
const seqDef* sp = sstart;
const SeqDef* const sstart = seqStorePtr->sequencesStart;
const SeqDef* const send = seqStorePtr->sequences;
const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
size_t const nbSeqs = (size_t)(send - sstart);
const BYTE* const lstart = seqStorePtr->litStart;
const BYTE* const lend = seqStorePtr->lit;
const BYTE* lp = lstart;
size_t const nbLiterals = (size_t)(lend - lstart);
BYTE const* ip = (BYTE const*)src;
BYTE const* const iend = ip + srcSize;
BYTE* const ostart = (BYTE*)dst;
@@ -446,112 +502,171 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode;
size_t targetCBlockSize = cctxParams->targetCBlockSize;
size_t litSize, seqCount;
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1;
int lastSequence = 0;
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
(unsigned)(lend-lp), (unsigned)(send-sstart));
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
(unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
litSize = 0;
seqCount = 0;
do {
size_t cBlockSizeEstimate = 0;
if (sstart == send) {
lastSequence = 1;
} else {
const seqDef* const sequence = sp + seqCount;
lastSequence = sequence == send - 1;
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
seqCount++;
/* let's start by a general estimation for the full block */
if (nbSeqs > 0) {
EstimatedBlockSize const ebs =
ZSTD_estimateSubBlockSize(lp, nbLiterals,
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
&nextCBlock->entropy, entropyMetadata,
workspace, wkspSize,
writeLitEntropy, writeSeqEntropy);
/* quick estimation */
size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
size_t n, avgBlockBudget, blockBudgetSupp=0;
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
* this will result in the production of a single uncompressed block covering @srcSize.*/
if (ebs.estBlockSize > srcSize) return 0;
/* compress and write sub-blocks */
assert(nbSubBlocks>0);
for (n=0; n < nbSubBlocks-1; n++) {
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
/* if reached last sequence : break to last sub-block (simplification) */
assert(seqCount <= (size_t)(send-sp));
if (sp + seqCount == send) break;
assert(seqCount > 0);
/* compress sub-block */
{ int litEntropyWritten = 0;
int seqEntropyWritten = 0;
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
const size_t decompressedSize =
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, (size_t)(oend-op),
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
0);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
/* check compressibility, update state components */
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
blockBudgetSupp = 0;
} }
/* otherwise : do not compress yet, coalesce current sub-block with following one */
}
if (lastSequence) {
assert(lp <= lend);
assert(litSize <= (size_t)(lend - lp));
litSize = (size_t)(lend - lp);
}
/* I think there is an optimization opportunity here.
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
* since it recalculates estimate from scratch.
* For example, it would recount literal distribution and symbol codes every time.
*/
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
&nextCBlock->entropy, entropyMetadata,
workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
int litEntropyWritten = 0;
int seqEntropyWritten = 0;
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, oend-op,
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
lastBlock && lastSequence);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Committed the sub-block");
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
sp += seqCount;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
litSize = 0;
seqCount = 0;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
} /* if (nbSeqs > 0) */
/* write last block */
DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
{ int litEntropyWritten = 0;
int seqEntropyWritten = 0;
size_t litSize = (size_t)(lend - lp);
size_t seqCount = (size_t)(send - sp);
const size_t decompressedSize =
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
sp, seqCount,
lp, litSize,
llCodePtr, mlCodePtr, ofCodePtr,
cctxParams,
op, (size_t)(oend-op),
bmi2, writeLitEntropy, writeSeqEntropy,
&litEntropyWritten, &seqEntropyWritten,
lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
/* update pointers, the nb of literals borrowed from next sequence must be preserved */
if (cSize > 0 && cSize < decompressedSize) {
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
(unsigned)decompressedSize, (unsigned)cSize);
assert(ip + decompressedSize <= iend);
ip += decompressedSize;
lp += litSize;
op += cSize;
llCodePtr += seqCount;
mlCodePtr += seqCount;
ofCodePtr += seqCount;
/* Entropy only needs to be written once */
if (litEntropyWritten) {
writeLitEntropy = 0;
}
if (seqEntropyWritten) {
writeSeqEntropy = 0;
}
sp += seqCount;
}
} while (!lastSequence);
}
if (writeLitEntropy) {
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
DEBUGLOG(5, "Literal entropy tables were never written");
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
}
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
/* If we haven't written our entropy tables, then we've violated our contract and
* must emit an uncompressed block.
*/
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
return 0;
}
if (ip < iend) {
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
/* some data left : last part of the block sent uncompressed */
size_t const rSize = (size_t)((iend - ip));
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
assert(cSize != 0);
op += cSize;
/* We have to regenerate the repcodes because we've skipped some sequences */
if (sp < send) {
seqDef const* seq;
repcodes_t rep;
const SeqDef* seq;
Repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
}
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
}
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
return op-ostart;
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
(unsigned)(op-ostart));
return (size_t)(op-ostart);
}
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
void const* src, size_t srcSize,
unsigned lastBlock) {
const void* src, size_t srcSize,
unsigned lastBlock)
{
ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
@@ -559,7 +674,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
&entropyMetadata,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), "");
return ZSTD_compressSubBlock_multi(&zc->seqStore,
zc->blockState.prevCBlock,
@@ -569,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
dst, dstCapacity,
src, srcSize,
zc->bmi2, lastBlock,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */);
}
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
+184 -95
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -14,11 +14,10 @@
/*-*************************************
* Dependencies
***************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h"
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/portability_macros.h"
#include "../common/compiler.h" /* ZS2_isPower2 */
/*-*************************************
* Constants
@@ -44,8 +43,9 @@ extern "C" {
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers,
ZSTD_cwksp_alloc_aligned
ZSTD_cwksp_alloc_aligned_init_once,
ZSTD_cwksp_alloc_aligned,
ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e;
/**
@@ -98,8 +98,8 @@ typedef enum {
*
* Workspace Layout:
*
* [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
* [ ... workspace ... ]
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
@@ -123,9 +123,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used. These
* buffers are each aligned to 64 bytes.
* - Init once: these buffers require to be initialized at least once before
* use. They should be used when we want to skip memory initialization
* while not triggering memory checkers (like Valgrind) when reading from
* from this memory without writing to it first.
* These buffers should be used carefully as they might contain data
* from previous compressions.
* Buffers are aligned to 64 bytes.
*
* - Aligned: these buffers don't require any initialization before they're
* used. The user of the buffer should make sure they write into a buffer
* location before reading from it.
* Buffers are aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@@ -137,8 +146,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
* 2. Buffers
* 3. Aligned/Tables
* 2. Init once / Tables
* 3. Aligned / Tables
* 4. Buffers / Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@@ -150,6 +160,7 @@ typedef struct {
void* tableEnd;
void* tableValidEnd;
void* allocStart;
void* initOnceStart;
BYTE allocFailed;
int workspaceOversizedDuration;
@@ -162,6 +173,7 @@ typedef struct {
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
@@ -171,14 +183,29 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
assert(ws->workspace <= ws->initOnceStart);
#if ZSTD_MEMORY_SANITIZER
{
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
(void)offset;
#if defined(ZSTD_MSAN_PRINT)
if(offset!=-1) {
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
}
#endif
assert(offset==-1);
};
#endif
}
/**
* Align must be a power of 2.
*/
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t align) {
size_t const mask = align - 1;
assert((align & mask) == 0);
assert(ZSTD_isPower2(align));
return (size + mask) & ~mask;
}
@@ -192,7 +219,7 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size().
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
if (size == 0)
@@ -204,12 +231,16 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#endif
}
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment));
}
/**
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
* Used to determine the number of bytes required for a given "aligned".
*/
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) {
return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES);
}
/**
@@ -217,14 +248,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
* to align the beginning of the aligned section.
*
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
* aligneds being sized in multiples of 64 bytes.
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
* bytes to align the beginning of tables section and end of buffers;
*/
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace;
}
@@ -236,11 +263,23 @@ MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0);
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
assert(ZSTD_isPower2(alignBytes));
assert(bytes < alignBytes);
return bytes;
}
/**
* Returns the initial value for allocStart which is used to determine the position from
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws)
{
char* endPtr = (char*)ws->workspaceEnd;
assert(ZSTD_isPower2(ZSTD_CWKSP_ALIGNMENT_BYTES));
endPtr = endPtr - ((size_t)endPtr % ZSTD_CWKSP_ALIGNMENT_BYTES);
return (void*)endPtr;
}
/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@@ -253,7 +292,7 @@ ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
{
void* const alloc = (BYTE*)ws->allocStart - bytes;
void* const bottom = ws->tableEnd;
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
DEBUGLOG(5, "cwksp: reserving [0x%p]:%zd bytes; %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
@@ -281,27 +320,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{
assert(phase >= ws->phase);
if (phase > ws->phase) {
/* Going from allocating objects to allocating buffers */
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
/* Going from allocating objects to allocating initOnce / tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd;
}
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
/* Going from allocating buffers to allocating aligneds/tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
size_t const bytesToAlign =
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
memory_allocation, "aligned phase - alignment initial allocation failed!");
}
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void* const alloc = ws->objectEnd;
void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* const objectEnd = (BYTE*)alloc + bytesToAlign;
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
@@ -309,7 +337,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
} } }
}
}
}
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
@@ -321,7 +351,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
}
/**
@@ -348,7 +378,9 @@ ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase
if (alloc) {
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
__asan_unpoison_memory_region(alloc, bytes);
/* We need to keep the redzone poisoned while unpoisoning the bytes that
* are actually allocated. */
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
}
}
#endif
@@ -366,29 +398,64 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
* This memory has been initialized at least once in the past.
* This doesn't mean it has been initialized this time, and it might contain data from previous
* operations.
* The main usage is for algorithms that might need read access into uninitialized memory.
* The algorithm must maintain safety under these conditions and must make sure it doesn't
* leak any of the past data (directly or in side channels).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
{
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
if(ptr && ptr < ws->initOnceStart) {
/* We assume the memory following the current allocation is either:
* 1. Not usable as initOnce memory (end of workspace)
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
* 3. An ASAN redzone, in which case we don't want to write on it
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
ws->initOnceStart = ptr;
}
#if ZSTD_MEMORY_SANITIZER
assert(__msan_test_shadow(ptr, bytes) == -1);
#endif
return ptr;
}
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned64(ZSTD_cwksp* ws, size_t bytes)
{
void* const ptr = ZSTD_cwksp_reserve_internal(ws,
ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
return ptr;
}
/**
* Aligned on 64 bytes. These buffers have the special property that
* their values remain constrained, allowing us to re-use them without
* their values remain constrained, allowing us to reuse them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc;
void* end;
void* top;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
/* We can only start allocating tables after we are done reserving space for objects at the
* start of the workspace */
if(ws->phase < phase) {
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
@@ -413,7 +480,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
#endif
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
return alloc;
}
@@ -459,19 +526,41 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
return alloc;
}
/**
* with alignment control
* Note : should happen only once, at workspace first initialization
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t byteSize, size_t alignment)
{
size_t const mask = alignment - 1;
size_t const surplus = (alignment > sizeof(void*)) ? alignment - sizeof(void*) : 0;
void* const start = ZSTD_cwksp_reserve_object(ws, byteSize + surplus);
if (start == NULL) return NULL;
if (surplus == 0) return start;
assert(ZSTD_isPower2(alignment));
return (void*)(((size_t)start + surplus) & ~mask);
}
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
/* To validate that the table reuse logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. */
* space every time we mark it dirty.
* Since tableValidEnd space and initOnce space may overlap we don't poison
* the initOnce portion as it break its promise. This means that this poisoning
* check isn't always applied fully. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
__msan_poison(ws->objectEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
__msan_poison(ws->objectEnd, size);
} else {
assert(ws->initOnceStart >= ws->objectEnd);
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
}
}
#endif
@@ -499,7 +588,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
}
ZSTD_cwksp_mark_tables_clean(ws);
}
@@ -508,7 +597,8 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
* Invalidates table allocations.
* All other allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: clearing tables!");
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
@@ -534,13 +624,16 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!");
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
/* To validate that the context reuse logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
* the workspace (or at least the non-static, non-table parts of it)
* every time we start a new compression. */
* the workspace except for the areas in which we expect memory reuse
* without initialization (objects, valid tables area and init once
* memory). */
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
}
}
#endif
@@ -556,14 +649,23 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#endif
ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd;
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
ws->phase = ZSTD_cwksp_alloc_buffers;
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
}
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
@@ -576,6 +678,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
@@ -594,6 +697,11 @@ MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
if (ptr != NULL && customMem.customFree != NULL) {
__msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
}
#endif
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_customFree(ptr, customMem);
}
@@ -607,15 +715,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
}
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed;
}
@@ -628,17 +727,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
size_t const estimatedSpace, int resizedWorkspace) {
if (resizedWorkspace) {
/* Resized/newly allocated wksp should have exact bounds */
return ZSTD_cwksp_used(ws) == estimatedSpace;
} else {
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
*/
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
}
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
* the alignment bytes difference between estimation and actual usage */
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
ZSTD_cwksp_used(ws) <= estimatedSpace;
}
@@ -669,8 +762,4 @@ MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
}
}
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_CWKSP_H */
+163 -81
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,8 +11,49 @@
#include "zstd_compress_internal.h"
#include "zstd_double_fast.h"
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
/* Always insert every fastHashFillStep position into the hash tables.
* Insert the other positions into the large hash table if their entry
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0) {
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
}
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -43,13 +84,26 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
} }
}
void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
}
}
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls /* template */)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
@@ -67,7 +121,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t mLength;
U32 offset;
@@ -88,9 +142,14 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* matchl0; /* the long match for ip */
const BYTE* matchs0; /* the short match for ip */
const BYTE* matchl1; /* the long match for ip1 */
const BYTE* matchs0_safe; /* matchs0 or safe address */
const BYTE* ip = istart; /* the current position */
const BYTE* ip1; /* the next position */
/* Array of ~random data, should have low probability of matching data
* we load from here instead of from tables, if matchl0/matchl1 are
* invalid indices. Used to avoid unpredictable branches. */
const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
@@ -100,8 +159,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}
/* Outer Loop: one iteration per match found and stored */
@@ -131,30 +190,35 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
if (idxl0 > prefixLowestIndex) {
/* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.
* However expression below complies into conditional move. Since
* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
* if there is a match, all branches become predictable. */
{ const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
/* check prefix long match */
if (MEM_read64(matchl0) == MEM_read64(ip)) {
if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
offset = (U32)(ip-matchl0);
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
goto _match_found;
}
}
} }
idxl1 = hashLong[hl1];
matchl1 = base + idxl1;
if (idxs0 > prefixLowestIndex) {
/* check prefix short match */
if (MEM_read32(matchs0) == MEM_read32(ip)) {
goto _search_next_long;
}
/* Same optimization as matchl0 above */
matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]);
/* check prefix short match */
if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {
goto _search_next_long;
}
if (ip1 >= nextStep) {
@@ -175,30 +239,36 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
} while (ip1 <= ilimit);
_cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */
return (size_t)(iend - anchor);
_search_next_long:
/* check prefix long +1 match */
if (idxl1 > prefixLowestIndex) {
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
/* short match found: let's check for a longer one */
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
offset = (U32)(ip - matchs0);
/* check long match at +1 position */
if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) {
size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8;
if (l1len > mLength) {
/* use the long match instead */
ip = ip1;
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
mLength = l1len;
offset = (U32)(ip-matchl1);
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
goto _match_found;
matchs0 = matchl1;
}
}
/* if no long +1 match, explore the short match we found */
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
offset = (U32)(ip - matchs0);
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */
/* fall-through */
@@ -217,7 +287,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
hashLong[hl1] = (U32)(ip1 - base);
}
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
@@ -243,7 +313,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -254,8 +324,9 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
FORCE_INLINE_TEMPLATE
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */)
{
@@ -275,9 +346,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
const U32* const dictHashLong = dms->hashTable;
const U32* const dictHashSmall = dms->chainTable;
@@ -286,8 +356,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
const U32 dictHBitsL = dictCParams->hashLog;
const U32 dictHBitsS = dictCParams->chainLog;
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
@@ -295,6 +365,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes);
PREFETCH_AREA(dictHashSmall, chainTableBytes);
}
/* init */
ip += (dictAndPrefixLength == 0);
@@ -309,8 +386,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
U32 const curr = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2];
U32 matchIndexS = hashSmall[h];
@@ -323,26 +404,24 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
/* check repcode */
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored;
}
if (matchIndexL > prefixLowestIndex) {
if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
/* check prefix long match */
if (MEM_read64(matchLong) == MEM_read64(ip)) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
}
} else {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
} else if (dictTagsMatchL) {
/* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL];
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd);
@@ -354,13 +433,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
/* short match candidate */
if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
} else {
} else if (dictTagsMatchS) {
/* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS];
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta;
@@ -375,25 +454,24 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
continue;
_search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = curr + 1;
/* check prefix long +1 match */
if (matchIndexL3 > prefixLowestIndex) {
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
}
} else {
if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
} else if (dictTagsMatchL3) {
/* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
@@ -419,7 +497,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored:
/* match found */
@@ -443,12 +521,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
dictBase + repIndex2 - dictIndexDelta :
base + repIndex2;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -461,8 +539,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;
/* Return the last literals size */
return (size_t)(iend - anchor);
@@ -470,7 +548,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
@@ -488,7 +566,7 @@ ZSTD_GEN_DFAST_FN(dictMatchState, 7)
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
const U32 mls = ms->cParams.minMatch;
@@ -508,7 +586,7 @@ size_t ZSTD_compressBlock_doubleFast(
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
const U32 mls = ms->cParams.minMatch;
@@ -527,8 +605,10 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
}
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls /* template */)
{
@@ -579,13 +659,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
size_t mLength;
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -596,7 +676,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -621,7 +701,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -647,13 +727,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
& (offset_2 <= current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -677,7 +757,7 @@ ZSTD_GEN_DFAST_FN(extDict, 6)
ZSTD_GEN_DFAST_FN(extDict, 7)
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
U32 const mls = ms->cParams.minMatch;
@@ -694,3 +774,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,28 +11,32 @@
#ifndef ZSTD_DOUBLE_FAST_H
#define ZSTD_DOUBLE_FAST_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#endif
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
#else
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
#endif /* ZSTD_DOUBLE_FAST_H */
File diff suppressed because it is too large Load Diff
+7 -14
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,27 +11,20 @@
#ifndef ZSTD_FAST_H
#define ZSTD_FAST_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_FAST_H */
File diff suppressed because it is too large Load Diff
+141 -73
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,10 +11,6 @@
#ifndef ZSTD_LAZY_H
#define ZSTD_LAZY_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h"
/**
@@ -25,101 +21,173 @@ extern "C" {
*/
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip);
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
#endif
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_GREEDY NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY NULL
#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_lazy2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict_row(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
#else
#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
#endif
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btlazy2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
#endif
#endif /* ZSTD_LAZY_H */
+62 -41
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -16,7 +16,7 @@
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
#include "zstd_ldm_geartab.h"
#define LDM_BUCKET_SIZE_LOG 3
#define LDM_BUCKET_SIZE_LOG 4
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_RLOG 7
@@ -133,21 +133,35 @@ static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
}
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams)
const ZSTD_compressionParameters* cParams)
{
params->windowLog = cParams->windowLog;
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
}
if (params->hashRateLog == 0) {
params->hashRateLog = params->windowLog < params->hashLog
? 0
: params->windowLog - params->hashLog;
if (params->hashLog > 0) {
/* if params->hashLog is set, derive hashRateLog from it */
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
if (params->windowLog > params->hashLog) {
params->hashRateLog = params->windowLog - params->hashLog;
}
} else {
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
/* mapping from [fast, rate7] to [btultra2, rate4] */
params->hashRateLog = 7 - (cParams->strategy/3);
}
}
if (params->hashLog == 0) {
params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX);
}
if (params->minMatchLength == 0) {
params->minMatchLength = LDM_MIN_MATCH_LENGTH;
if (cParams->strategy >= ZSTD_btultra)
params->minMatchLength /= 2;
}
if (params->bucketSizeLog==0) {
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX);
}
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
}
@@ -170,22 +184,22 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
/** ZSTD_ldm_getBucket() :
* Returns a pointer to the start of the bucket associated with hash. */
static ldmEntry_t* ZSTD_ldm_getBucket(
ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog)
{
return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
return ldmState->hashTable + (hash << bucketSizeLog);
}
/** ZSTD_ldm_insertEntry() :
* Insert the entry with corresponding hash into the hash table */
static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
size_t const hash, const ldmEntry_t entry,
ldmParams_t const ldmParams)
U32 const bucketSizeLog)
{
BYTE* const pOffset = ldmState->bucketOffsets + hash;
unsigned const offset = *pOffset;
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
*pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
*(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry;
*pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1));
}
@@ -234,7 +248,7 @@ static size_t ZSTD_ldm_countBackwardsMatch_2segments(
*
* The tables for the other strategies are filled within their
* block compressors. */
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms,
void const* end)
{
const BYTE* const iend = (const BYTE*)end;
@@ -242,11 +256,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
switch(ms->cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break;
case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
#else
assert(0); /* shouldn't be called: cparams should've been adjusted. */
#endif
break;
case ZSTD_greedy:
@@ -269,7 +287,8 @@ void ZSTD_ldm_fillHashTable(
const BYTE* iend, ldmParams_t const* params)
{
U32 const minMatchLength = params->minMatchLength;
U32 const hBits = params->hashLog - params->bucketSizeLog;
U32 const bucketSizeLog = params->bucketSizeLog;
U32 const hBits = params->hashLog - bucketSizeLog;
BYTE const* const base = ldmState->window.base;
BYTE const* const istart = ip;
ldmRollingHashState_t hashState;
@@ -284,7 +303,7 @@ void ZSTD_ldm_fillHashTable(
unsigned n;
numSplits = 0;
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits);
for (n = 0; n < numSplits; n++) {
if (ip + splits[n] >= istart + minMatchLength) {
@@ -295,7 +314,7 @@ void ZSTD_ldm_fillHashTable(
entry.offset = (U32)(split - base);
entry.checksum = (U32)(xxhash >> 32);
ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog);
}
}
@@ -309,7 +328,7 @@ void ZSTD_ldm_fillHashTable(
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
* if it is far way
* (after a long match, only update tables a limited amount). */
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor)
{
U32 const curr = (U32)(anchor - ms->window.base);
if (curr > ms->nextToUpdate + 1024) {
@@ -318,8 +337,10 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
}
}
static size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, RawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize)
{
/* LDM parameters */
@@ -373,7 +394,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
candidates[n].split = split;
candidates[n].hash = hash;
candidates[n].checksum = (U32)(xxhash >> 32);
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog);
PREFETCH_L1(candidates[n].bucket);
}
@@ -396,7 +417,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
* the previous one, we merely register it in the hash table and
* move on */
if (split < anchor) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
continue;
}
@@ -443,7 +464,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* No match found -- insert an entry into the hash table
* and process the next candidate match */
if (bestEntry == NULL) {
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
continue;
}
@@ -464,7 +485,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
/* Insert the current entry into the hash table --- it must be
* done after the previous block to avoid clobbering bestEntry */
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog);
anchor = split + forwardMatchLength;
@@ -503,7 +524,7 @@ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
}
size_t ZSTD_ldm_generateSequences(
ldmState_t* ldmState, rawSeqStore_t* sequences,
ldmState_t* ldmState, RawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize)
{
U32 const maxDist = 1U << params->windowLog;
@@ -549,7 +570,7 @@ size_t ZSTD_ldm_generateSequences(
* the window through early invalidation.
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
* offset against maxDist directly.
*
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may
@@ -580,7 +601,7 @@ size_t ZSTD_ldm_generateSequences(
}
void
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
{
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
@@ -616,7 +637,7 @@ ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const min
* Returns the current sequence to handle, or if the rest of the block should
* be literals, it returns a sequence with offset == 0.
*/
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore,
U32 const remaining, U32 const minMatch)
{
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
@@ -640,7 +661,7 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
return sequence;
}
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) {
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@@ -657,14 +678,14 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
}
}
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_paramSwitch_e useRowMatchFinder,
size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_ParamSwitch_e useRowMatchFinder,
void const* src, size_t srcSize)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor =
ZSTD_BlockCompressor_f const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
@@ -689,7 +710,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch);
int i;
/* End signal */
if (sequence.offset == 0)
break;
@@ -702,6 +722,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* Run the block compressor */
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
int i;
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength;
@@ -711,7 +732,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset;
/* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
STORE_OFFSET(sequence.offset),
OFFSET_TO_OFFBASE(sequence.offset),
sequence.matchLength);
ip += sequence.matchLength;
}
+7 -15
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,10 +11,6 @@
#ifndef ZSTD_LDM_H
#define ZSTD_LDM_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h" /* ldmParams_t, U32 */
#include "../zstd.h" /* ZSTD_CCtx, size_t */
@@ -43,7 +39,7 @@ void ZSTD_ldm_fillHashTable(
* sequences.
*/
size_t ZSTD_ldm_generateSequences(
ldmState_t* ldms, rawSeqStore_t* sequences,
ldmState_t* ldms, RawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize);
/**
@@ -64,9 +60,9 @@ size_t ZSTD_ldm_generateSequences(
* two. We handle that case correctly, and update `rawSeqStore` appropriately.
* NOTE: This function does not return any errors.
*/
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_paramSwitch_e useRowMatchFinder,
size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore,
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_ParamSwitch_e useRowMatchFinder,
void const* src, size_t srcSize);
/**
@@ -76,7 +72,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
* Avoids emitting matches less than `minMatch` bytes.
* Must be called for data that is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
void ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize,
U32 const minMatch);
/* ZSTD_ldm_skipRawSeqStoreBytes():
@@ -84,7 +80,7 @@ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
* Not to be used in conjunction with ZSTD_ldm_skipSequences().
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes);
/** ZSTD_ldm_getTableSize() :
* Estimate the space needed for long distance matching tables or 0 if LDM is
@@ -110,8 +106,4 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_FAST_H */
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
File diff suppressed because it is too large Load Diff
+40 -24
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,46 +11,62 @@
#ifndef ZSTD_OPT_H
#define ZSTD_OPT_H
#if defined (__cplusplus)
extern "C" {
#endif
#include "zstd_compress_internal.h"
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
/* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend);
#endif
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
#else
#define ZSTD_COMPRESSBLOCK_BTOPT NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
#endif
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
size_t ZSTD_compressBlock_btultra(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */
size_t ZSTD_compressBlock_btultra2(
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}
#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
#else
#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
#endif
#endif /* ZSTD_OPT_H */
@@ -0,0 +1,238 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "../common/compiler.h" /* ZSTD_ALIGNOF */
#include "../common/mem.h" /* S64 */
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
#include "hist.h" /* HIST_add */
#include "zstd_preSplit.h"
#define BLOCKSIZE_MIN 3500
#define THRESHOLD_PENALTY_RATE 16
#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)
#define THRESHOLD_PENALTY 3
#define HASHLENGTH 2
#define HASHLOG_MAX 10
#define HASHTABLESIZE (1 << HASHLOG_MAX)
#define HASHMASK (HASHTABLESIZE - 1)
#define KNUTH 0x9e3779b9
/* for hashLog > 8, hash 2 bytes.
* for hashLog == 8, just take the byte, no hashing.
* The speed of this method relies on compile-time constant propagation */
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
{
assert(hashLog >= 8);
if (hashLog == 8) return (U32)((const BYTE*)p)[0];
assert(hashLog <= HASHLOG_MAX);
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
}
typedef struct {
unsigned events[HASHTABLESIZE];
size_t nbEvents;
} Fingerprint;
typedef struct {
Fingerprint pastEvents;
Fingerprint newEvents;
} FPStats;
static void initStats(FPStats* fpstats)
{
ZSTD_memset(fpstats, 0, sizeof(FPStats));
}
FORCE_INLINE_TEMPLATE void
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
const char* p = (const char*)src;
size_t limit = srcSize - HASHLENGTH + 1;
size_t n;
assert(srcSize >= HASHLENGTH);
for (n = 0; n < limit; n+=samplingRate) {
fp->events[hash2(p+n, hashLog)]++;
}
fp->nbEvents += limit/samplingRate;
}
FORCE_INLINE_TEMPLATE void
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
fp->nbEvents = 0;
addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
}
typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
{
U64 distance = 0;
size_t n;
assert(hashLog <= HASHLOG_MAX);
for (n = 0; n < ((size_t)1 << hashLog); n++) {
distance +=
abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
}
return distance;
}
/* Compare newEvents with pastEvents
* return 1 when considered "too different"
*/
static int compareFingerprints(const Fingerprint* ref,
const Fingerprint* newfp,
int penalty,
unsigned hashLog)
{
assert(ref->nbEvents > 0);
assert(newfp->nbEvents > 0);
{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
U64 deviation = fpDistance(ref, newfp, hashLog);
U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
return deviation >= threshold;
}
}
static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
acc->events[n] += newfp->events[n];
}
acc->nbEvents += newfp->nbEvents;
}
static void flushEvents(FPStats* fpstats)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];
}
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;
ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
}
static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
{
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
assert(acc->events[n] >= slice->events[n]);
acc->events[n] -= slice->events[n];
}
acc->nbEvents -= slice->nbEvents;
}
#define CHUNKSIZE (8 << 10)
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
static const RecordEvents_f records_fs[] = {
FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
};
static const unsigned hashParams[] = { 8, 9, 10, 10 };
const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
FPStats* const fpstats = (FPStats*)workspace;
const char* p = (const char*)blockStart;
int penalty = THRESHOLD_PENALTY;
size_t pos = 0;
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
initStats(fpstats);
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
return pos;
} else {
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
if (penalty > 0) penalty--;
}
}
assert(pos == blockSize);
return blockSize;
(void)flushEvents; (void)removeEvents;
}
/* ZSTD_splitBlock_fromBorders(): very fast strategy :
* compare fingerprint from beginning and end of the block,
* derive from their difference if it's preferable to split in the middle,
* repeat the process a second time, for finer grained decision.
* 3 times did not brought improvements, so I stopped at 2.
* Benefits are good enough for a cheap heuristic.
* More accurate splitting saves more, but speed impact is also more perceptible.
* For better accuracy, use more elaborate variant *_byChunks.
*/
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
void* workspace, size_t wkspSize)
{
#define SEGMENT_SIZE 512
FPStats* const fpstats = (FPStats*)workspace;
Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
initStats(fpstats);
HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
return blockSize;
HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
middleEvents->nbEvents = SEGMENT_SIZE;
{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
return 64 KB;
return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
}
}
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
assert(0<=level && level<=4);
if (level == 0)
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
/* level >= 1*/
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
}
@@ -0,0 +1,33 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_PRESPLIT_H
#define ZSTD_PRESPLIT_H
#include <stddef.h> /* size_t */
#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
/* ZSTD_splitBlock():
* @level must be a value between 0 and 4.
* higher levels spend more energy to detect block boundaries.
* @workspace must be aligned for size_t.
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note:
* For the time being, this function only accepts full 128 KB blocks.
* Therefore, @blockSize must be == 128 KB.
* While this could be extended to smaller sizes in the future,
* it is not yet clear if this would be useful. TBD.
*/
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize);
#endif /* ZSTD_PRESPLIT_H */
+239 -175
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -15,16 +15,13 @@
#endif
/* ====== Constants ====== */
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
/* ====== Dependencies ====== */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
#include "../common/threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_ldm.h"
#include "zstdmt_compress.h"
@@ -43,12 +40,13 @@
# include <unistd.h>
# include <sys/times.h>
# define DEBUG_PRINTHEX(l,p,n) { \
unsigned debug_u; \
for (debug_u=0; debug_u<(n); debug_u++) \
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
RAWLOG(l, " \n"); \
}
# define DEBUG_PRINTHEX(l,p,n) \
do { \
unsigned debug_u; \
for (debug_u=0; debug_u<(n); debug_u++) \
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
RAWLOG(l, " \n"); \
} while (0)
static unsigned long long GetCurrentClockTimeMicroseconds(void)
{
@@ -60,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
} }
#define MUTEX_WAIT_TIME_DLEVEL 6
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
ZSTD_pthread_mutex_lock(mutex); \
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
unsigned long long const elapsedTime = (afterTime-beforeTime); \
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
elapsedTime, #mutex); \
} } \
} else { \
ZSTD_pthread_mutex_lock(mutex); \
} \
}
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
do { \
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
ZSTD_pthread_mutex_lock(mutex); \
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
unsigned long long const elapsedTime = (afterTime-beforeTime); \
if (elapsedTime > 1000) { \
/* or whatever threshold you like; I'm using 1 millisecond here */ \
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
"Thread took %llu microseconds to acquire mutex %s \n", \
elapsedTime, #mutex); \
} } \
} else { \
ZSTD_pthread_mutex_lock(mutex); \
} \
} while (0)
#else
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
# define DEBUG_PRINTHEX(l,p,n) {}
# define DEBUG_PRINTHEX(l,p,n) do { } while (0)
#endif
@@ -89,9 +90,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
typedef struct buffer_s {
void* start;
size_t capacity;
} buffer_t;
} Buffer;
static const buffer_t g_nullBuffer = { NULL, 0 };
static const Buffer g_nullBuffer = { NULL, 0 };
typedef struct ZSTDMT_bufferPool_s {
ZSTD_pthread_mutex_t poolMutex;
@@ -99,18 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
unsigned totalBuffers;
unsigned nbBuffers;
ZSTD_customMem cMem;
buffer_t bTable[1]; /* variable size */
Buffer* buffers;
} ZSTDMT_bufferPool;
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
{
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
if (!bufPool) return; /* compatibility with free on NULL */
if (bufPool->buffers) {
unsigned u;
for (u=0; u<bufPool->totalBuffers; u++) {
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
}
ZSTD_customFree(bufPool->buffers, bufPool->cMem);
}
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
ZSTD_customFree(bufPool, bufPool->cMem);
}
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
{
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
ZSTDMT_bufferPool* const bufPool =
(ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
if (bufPool==NULL) return NULL;
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
ZSTD_customFree(bufPool, cMem);
return NULL;
}
bufPool->buffers = (Buffer*)ZSTD_customCalloc(maxNbBuffers * sizeof(Buffer), cMem);
if (bufPool->buffers==NULL) {
ZSTDMT_freeBufferPool(bufPool);
return NULL;
}
bufPool->bufferSize = 64 KB;
bufPool->totalBuffers = maxNbBuffers;
bufPool->nbBuffers = 0;
@@ -118,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
return bufPool;
}
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
{
unsigned u;
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
if (!bufPool) return; /* compatibility with free on NULL */
for (u=0; u<bufPool->totalBuffers; u++) {
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
}
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
ZSTD_customFree(bufPool, bufPool->cMem);
}
/* only works at initialization, not during compression */
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{
size_t const poolSize = sizeof(*bufPool)
+ (bufPool->totalBuffers - 1) * sizeof(buffer_t);
size_t const poolSize = sizeof(*bufPool);
size_t const arraySize = bufPool->totalBuffers * sizeof(Buffer);
unsigned u;
size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++)
totalBufferSize += bufPool->bTable[u].capacity;
totalBufferSize += bufPool->buffers[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return poolSize + totalBufferSize;
return poolSize + arraySize + totalBufferSize;
}
/* ZSTDMT_setBufferSize() :
@@ -180,15 +189,15 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
* assumption : bufPool must be valid
* @return : a buffer, with start pointer and size
* note: allocation may fail, in this case, start==NULL and size==0 */
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
static Buffer ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
{
size_t const bSize = bufPool->bufferSize;
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */
buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
Buffer const buf = bufPool->buffers[--(bufPool->nbBuffers)];
size_t const availBufferSize = buf.capacity;
bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -203,7 +212,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
/* create new buffer */
DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
{ buffer_t buffer;
{ Buffer buffer;
void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
buffer.start = start; /* note : start can be NULL if malloc fails ! */
buffer.capacity = (start==NULL) ? 0 : bSize;
@@ -222,12 +231,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
* @return : a buffer that is at least the buffer pool buffer size.
* If a reallocation happens, the data in the input buffer is copied.
*/
static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
static Buffer ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, Buffer buffer)
{
size_t const bSize = bufPool->bufferSize;
if (buffer.capacity < bSize) {
void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
buffer_t newBuffer;
Buffer newBuffer;
newBuffer.start = start;
newBuffer.capacity = start == NULL ? 0 : bSize;
if (start != NULL) {
@@ -243,20 +252,20 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
#endif
/* store buffer for later re-use, up to pool capacity */
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, Buffer buf)
{
DEBUGLOG(5, "ZSTDMT_releaseBuffer");
if (buf.start == NULL) return; /* compatible with release on NULL */
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers < bufPool->totalBuffers) {
bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return;
}
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
/* Reached bufferPool capacity (should not happen) */
/* Reached bufferPool capacity (note: should not happen) */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
ZSTD_customFree(buf.start, bufPool->cMem);
}
@@ -266,11 +275,11 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
* 1 buffer for input loading
* 1 buffer for "next input" when submitting current one
* 1 buffer stuck in queue */
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
* So we only need one seq buffer per worker. */
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
/* ===== Seq Pool Wrapper ====== */
@@ -281,23 +290,23 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
return ZSTDMT_sizeof_bufferPool(seqPool);
}
static rawSeqStore_t bufferToSeq(buffer_t buffer)
static RawSeqStore_t bufferToSeq(Buffer buffer)
{
rawSeqStore_t seq = kNullRawSeqStore;
RawSeqStore_t seq = kNullRawSeqStore;
seq.seq = (rawSeq*)buffer.start;
seq.capacity = buffer.capacity / sizeof(rawSeq);
return seq;
}
static buffer_t seqToBuffer(rawSeqStore_t seq)
static Buffer seqToBuffer(RawSeqStore_t seq)
{
buffer_t buffer;
Buffer buffer;
buffer.start = seq.seq;
buffer.capacity = seq.capacity * sizeof(rawSeq);
return buffer;
}
static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
static RawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
{
if (seqPool->bufferSize == 0) {
return kNullRawSeqStore;
@@ -306,13 +315,13 @@ static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
}
#if ZSTD_RESIZE_SEQPOOL
static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
static RawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, RawSeqStore_t seq)
{
return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
}
#endif
static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, RawSeqStore_t seq)
{
ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
}
@@ -349,16 +358,20 @@ typedef struct {
int totalCCtx;
int availCCtx;
ZSTD_customMem cMem;
ZSTD_CCtx* cctx[1]; /* variable size */
ZSTD_CCtx** cctxs;
} ZSTDMT_CCtxPool;
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{
int cid;
for (cid=0; cid<pool->totalCCtx; cid++)
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
if (!pool) return;
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
if (pool->cctxs) {
int cid;
for (cid=0; cid<pool->totalCCtx; cid++)
ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
ZSTD_customFree(pool->cctxs, pool->cMem);
}
ZSTD_customFree(pool, pool->cMem);
}
@@ -367,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
ZSTD_customMem cMem)
{
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
ZSTDMT_CCtxPool* const cctxPool =
(ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
assert(nbWorkers > 0);
if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_customFree(cctxPool, cMem);
return NULL;
}
cctxPool->cMem = cMem;
cctxPool->totalCCtx = nbWorkers;
cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
if (!cctxPool->cctxs) {
ZSTDMT_freeCCtxPool(cctxPool);
return NULL;
}
cctxPool->cMem = cMem;
cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool;
}
@@ -401,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
{
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
{ unsigned const nbWorkers = cctxPool->totalCCtx;
size_t const poolSize = sizeof(*cctxPool)
+ (nbWorkers-1) * sizeof(ZSTD_CCtx*);
unsigned u;
size_t const poolSize = sizeof(*cctxPool);
size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
size_t totalCCtxSize = 0;
unsigned u;
for (u=0; u<nbWorkers; u++) {
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
}
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
assert(nbWorkers > 0);
return poolSize + totalCCtxSize;
return poolSize + arraySize + totalCCtxSize;
}
}
@@ -420,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
if (cctxPool->availCCtx) {
cctxPool->availCCtx--;
{ ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
{ ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
return cctx;
} }
@@ -434,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
if (cctx==NULL) return; /* compatibility with release on NULL */
ZSTD_pthread_mutex_lock(&pool->poolMutex);
if (pool->availCCtx < pool->totalCCtx)
pool->cctx[pool->availCCtx++] = cctx;
pool->cctxs[pool->availCCtx++] = cctx;
else {
/* pool overflow : should not happen, since totalCCtx==nbWorkers */
DEBUGLOG(4, "CCtx pool overflow : free cctx");
@@ -448,7 +466,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
typedef struct {
void const* start;
size_t size;
} range_t;
} Range;
typedef struct {
/* All variables in the struct are protected by mutex. */
@@ -464,10 +482,10 @@ typedef struct {
ZSTD_pthread_mutex_t ldmWindowMutex;
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
} serialState_t;
} SerialState;
static int
ZSTDMT_serialState_reset(serialState_t* serialState,
ZSTDMT_serialState_reset(SerialState* serialState,
ZSTDMT_seqPool* seqPool,
ZSTD_CCtx_params params,
size_t jobSize,
@@ -537,7 +555,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
return 0;
}
static int ZSTDMT_serialState_init(serialState_t* serialState)
static int ZSTDMT_serialState_init(SerialState* serialState)
{
int initError = 0;
ZSTD_memset(serialState, 0, sizeof(*serialState));
@@ -548,7 +566,7 @@ static int ZSTDMT_serialState_init(serialState_t* serialState)
return initError;
}
static void ZSTDMT_serialState_free(serialState_t* serialState)
static void ZSTDMT_serialState_free(SerialState* serialState)
{
ZSTD_customMem cMem = serialState->params.customMem;
ZSTD_pthread_mutex_destroy(&serialState->mutex);
@@ -559,9 +577,10 @@ static void ZSTDMT_serialState_free(serialState_t* serialState)
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
}
static void ZSTDMT_serialState_update(serialState_t* serialState,
ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
range_t src, unsigned jobID)
static void
ZSTDMT_serialState_genSequences(SerialState* serialState,
RawSeqStore_t* seqStore,
Range src, unsigned jobID)
{
/* Wait for our turn */
ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
@@ -574,12 +593,13 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
/* It is now our turn, do any processing necessary */
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
size_t error;
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
seqStore.size == 0 && seqStore.capacity > 0);
DEBUGLOG(6, "ZSTDMT_serialState_genSequences: LDM update");
assert(seqStore->seq != NULL && seqStore->pos == 0 &&
seqStore->size == 0 && seqStore->capacity > 0);
assert(src.size <= serialState->params.jobSize);
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
error = ZSTD_ldm_generateSequences(
&serialState->ldmState, &seqStore,
&serialState->ldmState, seqStore,
&serialState->params.ldmParams, src.start, src.size);
/* We provide a large enough buffer to never fail. */
assert(!ZSTD_isError(error)); (void)error;
@@ -598,17 +618,22 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
serialState->nextJobID++;
ZSTD_pthread_cond_broadcast(&serialState->cond);
ZSTD_pthread_mutex_unlock(&serialState->mutex);
}
if (seqStore.size > 0) {
size_t const err = ZSTD_referenceExternalSequences(
jobCCtx, seqStore.seq, seqStore.size);
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
assert(!ZSTD_isError(err));
(void)err;
static void
ZSTDMT_serialState_applySequences(const SerialState* serialState, /* just for an assert() check */
ZSTD_CCtx* jobCCtx,
const RawSeqStore_t* seqStore)
{
if (seqStore->size > 0) {
DEBUGLOG(5, "ZSTDMT_serialState_applySequences: uploading %u external sequences", (unsigned)seqStore->size);
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable); (void)serialState;
assert(jobCCtx);
ZSTD_referenceExternalSequences(jobCCtx, seqStore->seq, seqStore->size);
}
}
static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
static void ZSTDMT_serialState_ensureFinished(SerialState* serialState,
unsigned jobID, size_t cSize)
{
ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
@@ -632,36 +657,37 @@ static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
/* ===== Worker thread ===== */
/* ------------------------------------------ */
static const range_t kNullRange = { NULL, 0 };
static const Range kNullRange = { NULL, 0 };
typedef struct {
size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */
ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */
ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */
ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */
ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */
serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */
buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */
range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */
unsigned jobID; /* set by mtctx, then read by worker => no barrier */
unsigned firstJob; /* set by mtctx, then read by worker => no barrier */
unsigned lastJob; /* set by mtctx, then read by worker => no barrier */
ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */
const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */
unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */
size_t dstFlushed; /* used only by mtctx */
unsigned frameChecksumNeeded; /* used only by mtctx */
size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */
ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */
ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */
ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */
ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */
SerialState* serial; /* Thread-safe - used by mtctx and (all) workers */
Buffer dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
Range prefix; /* set by mtctx, then read by worker & mtctx => no barrier */
Range src; /* set by mtctx, then read by worker & mtctx => no barrier */
unsigned jobID; /* set by mtctx, then read by worker => no barrier */
unsigned firstJob; /* set by mtctx, then read by worker => no barrier */
unsigned lastJob; /* set by mtctx, then read by worker => no barrier */
ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */
const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */
unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */
size_t dstFlushed; /* used only by mtctx */
unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription;
#define JOB_ERROR(e) { \
ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
job->cSize = e; \
ZSTD_pthread_mutex_unlock(&job->job_mutex); \
goto _endJob; \
}
#define JOB_ERROR(e) \
do { \
ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
job->cSize = e; \
ZSTD_pthread_mutex_unlock(&job->job_mutex); \
goto _endJob; \
} while (0)
/* ZSTDMT_compressionJob() is a POOL_function type */
static void ZSTDMT_compressionJob(void* jobDescription)
@@ -669,10 +695,11 @@ static void ZSTDMT_compressionJob(void* jobDescription)
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
buffer_t dstBuff = job->dstBuff;
RawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
Buffer dstBuff = job->dstBuff;
size_t lastCBlockSize = 0;
DEBUGLOG(5, "ZSTDMT_compressionJob: job %u", job->jobID);
/* resources */
if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
@@ -694,11 +721,15 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* init */
/* Perform serial step as early as possible */
ZSTDMT_serialState_genSequences(job->serial, &rawSeqStore, job->src, job->jobID);
if (job->cdict) {
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} else { /* srcStart points at reloaded section */
} else {
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
@@ -707,25 +738,26 @@ static void ZSTDMT_compressionJob(void* jobDescription)
size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
if (ZSTD_isError(err)) JOB_ERROR(err);
}
DEBUGLOG(6, "ZSTDMT_compressionJob: job %u: loading prefix of size %zu", job->jobID, job->prefix.size);
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent,
ZSTD_dtlm_fast,
NULL, /*cdict*/
&jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} }
/* Perform serial step as early as possible, but after CCtx initialization */
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
/* External Sequences can only be applied after CCtx initialization */
ZSTDMT_serialState_applySequences(job->serial, cctx, &rawSeqStore);
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
}
/* compress */
/* compress the entire job by smaller chunks, for better granularity */
{ size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
const BYTE* ip = (const BYTE*) job->src.start;
@@ -737,7 +769,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
assert(job->cSize == 0);
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
ip += chunkSize;
op += cSize; assert(op < oend);
@@ -757,8 +789,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
size_t const cSize = (job->lastJob) ?
ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
@@ -793,10 +825,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* ------------------------------------------ */
typedef struct {
range_t prefix; /* read-only non-owned prefix buffer */
buffer_t buffer;
Range prefix; /* read-only non-owned prefix buffer */
Buffer buffer;
size_t filled;
} inBuff_t;
} InBuff_t;
typedef struct {
BYTE* buffer; /* The round input buffer. All jobs get references
@@ -810,9 +842,9 @@ typedef struct {
* the inBuff is sent to the worker thread.
* pos <= capacity.
*/
} roundBuff_t;
} RoundBuff_t;
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
static const RoundBuff_t kNullRoundBuff = {NULL, 0, 0};
#define RSYNC_LENGTH 32
/* Don't create chunks smaller than the zstd block size.
@@ -829,7 +861,7 @@ typedef struct {
U64 hash;
U64 hitMask;
U64 primePower;
} rsyncState_t;
} RSyncState_t;
struct ZSTDMT_CCtx_s {
POOL_ctx* factory;
@@ -841,10 +873,10 @@ struct ZSTDMT_CCtx_s {
size_t targetSectionSize;
size_t targetPrefixSize;
int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
inBuff_t inBuff;
roundBuff_t roundBuff;
serialState_t serial;
rsyncState_t rsync;
InBuff_t inBuff;
RoundBuff_t roundBuff;
SerialState serial;
RSyncState_t rsync;
unsigned jobIDMask;
unsigned doneJobID;
unsigned nextJobID;
@@ -1090,7 +1122,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
{ unsigned jobNb;
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
mtctx->doneJobID, lastJobNb, mtctx->jobReady)
mtctx->doneJobID, lastJobNb, mtctx->jobReady);
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
unsigned const wJobID = jobNb & mtctx->jobIDMask;
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
@@ -1229,13 +1261,11 @@ size_t ZSTDMT_initCStream_internal(
/* init */
if (params.nbWorkers != mtctx->params.nbWorkers)
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, (unsigned)params.nbWorkers) , "");
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
ZSTDMT_waitForAllJobsCompleted(mtctx);
ZSTDMT_releaseAllJobResources(mtctx);
@@ -1244,15 +1274,14 @@ size_t ZSTDMT_initCStream_internal(
mtctx->params = params;
mtctx->frameContentSize = pledgedSrcSize;
ZSTD_freeCDict(mtctx->cdictLocal);
if (dict) {
ZSTD_freeCDict(mtctx->cdictLocal);
mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
params.cParams, mtctx->cMem);
mtctx->cdict = mtctx->cdictLocal;
if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
} else {
ZSTD_freeCDict(mtctx->cdictLocal);
mtctx->cdictLocal = NULL;
mtctx->cdict = cdict;
}
@@ -1318,9 +1347,32 @@ size_t ZSTDMT_initCStream_internal(
mtctx->allJobsCompleted = 0;
mtctx->consumed = 0;
mtctx->produced = 0;
/* update dictionary */
ZSTD_freeCDict(mtctx->cdictLocal);
mtctx->cdictLocal = NULL;
mtctx->cdict = NULL;
if (dict) {
if (dictContentType == ZSTD_dct_rawContent) {
mtctx->inBuff.prefix.start = (const BYTE*)dict;
mtctx->inBuff.prefix.size = dictSize;
} else {
/* note : a loadPrefix becomes an internal CDict */
mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byRef, dictContentType,
params.cParams, mtctx->cMem);
mtctx->cdict = mtctx->cdictLocal;
if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
}
} else {
mtctx->cdict = cdict;
}
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
dict, dictSize, dictContentType))
return ERROR(memory_allocation);
return 0;
}
@@ -1387,7 +1439,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
mtctx->roundBuff.pos += srcSize;
mtctx->inBuff.buffer = g_nullBuffer;
mtctx->inBuff.filled = 0;
/* Set the prefix */
/* Set the prefix for next job */
if (!endFrame) {
size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
@@ -1524,12 +1576,17 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
* If the data of the first job is broken up into two segments, we cover both
* sections.
*/
static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
static Range ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
{
unsigned const firstJobID = mtctx->doneJobID;
unsigned const lastJobID = mtctx->nextJobID;
unsigned jobID;
/* no need to check during first round */
size_t roundBuffCapacity = mtctx->roundBuff.capacity;
size_t nbJobs1stRoundMin = roundBuffCapacity / mtctx->targetSectionSize;
if (lastJobID < nbJobs1stRoundMin) return kNullRange;
for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
unsigned const wJobID = jobID & mtctx->jobIDMask;
size_t consumed;
@@ -1539,7 +1596,7 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
if (consumed < mtctx->jobs[wJobID].src.size) {
range_t range = mtctx->jobs[wJobID].prefix;
Range range = mtctx->jobs[wJobID].prefix;
if (range.size == 0) {
/* Empty prefix */
range = mtctx->jobs[wJobID].src;
@@ -1555,7 +1612,7 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
/**
* Returns non-zero iff buffer and range overlap.
*/
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
static int ZSTDMT_isOverlapped(Buffer buffer, Range range)
{
BYTE const* const bufferStart = (BYTE const*)buffer.start;
BYTE const* const rangeStart = (BYTE const*)range.start;
@@ -1575,10 +1632,10 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
}
}
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
static int ZSTDMT_doesOverlapWindow(Buffer buffer, ZSTD_window_t window)
{
range_t extDict;
range_t prefix;
Range extDict;
Range prefix;
DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
extDict.start = window.dictBase + window.lowLimit;
@@ -1597,7 +1654,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|| ZSTDMT_isOverlapped(buffer, prefix);
}
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, Buffer buffer)
{
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
@@ -1622,16 +1679,16 @@ static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
*/
static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
{
range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
Range const inUse = ZSTDMT_getInputDataInUse(mtctx);
size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
size_t const target = mtctx->targetSectionSize;
buffer_t buffer;
size_t const spaceNeeded = mtctx->targetSectionSize;
Buffer buffer;
DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
assert(mtctx->inBuff.buffer.start == NULL);
assert(mtctx->roundBuff.capacity >= target);
assert(mtctx->roundBuff.capacity >= spaceNeeded);
if (spaceLeft < target) {
if (spaceLeft < spaceNeeded) {
/* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
* Simply copy the prefix to the beginning in that case.
*/
@@ -1650,7 +1707,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
mtctx->roundBuff.pos = prefixSize;
}
buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
buffer.capacity = target;
buffer.capacity = spaceNeeded;
if (ZSTDMT_isOverlapped(buffer, inUse)) {
DEBUGLOG(5, "Waiting for buffer...");
@@ -1677,7 +1734,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
typedef struct {
size_t toLoad; /* The number of bytes to load from the input. */
int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
} syncPoint_t;
} SyncPoint;
/**
* Searches through the input for a synchronization point. If one is found, we
@@ -1685,14 +1742,14 @@ typedef struct {
* Otherwise, we will load as many bytes as possible and instruct the caller
* to continue as normal.
*/
static syncPoint_t
static SyncPoint
findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
{
BYTE const* const istart = (BYTE const*)input.src + input.pos;
U64 const primePower = mtctx->rsync.primePower;
U64 const hitMask = mtctx->rsync.hitMask;
syncPoint_t syncPoint;
SyncPoint syncPoint;
U64 hash;
BYTE const* prev;
size_t pos;
@@ -1734,7 +1791,7 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
}
} else {
/* We have enough bytes buffered to initialize the hash,
* and are have processed enough bytes to find a sync point.
* and have processed enough bytes to find a sync point.
* Start scanning at the beginning of the input.
*/
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
@@ -1761,17 +1818,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
* then a block will be emitted anyways, but this is okay, since if we
* are already synchronized we will remain synchronized.
*/
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
for (; pos < syncPoint.toLoad; ++pos) {
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
/* This assert is very expensive, and Debian compiles with asserts enabled.
* So disable it for now. We can get similar coverage by checking it at the
* beginning & end of the loop.
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
*/
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
if ((hash & hitMask) == hitMask) {
syncPoint.toLoad = pos + 1;
syncPoint.flush = 1;
++pos; /* for assert */
break;
}
}
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
return syncPoint;
}
@@ -1817,7 +1881,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
}
if (mtctx->inBuff.buffer.start != NULL) {
syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
SyncPoint const syncPoint = findSynchronizationPoint(mtctx, *input);
if (syncPoint.flush && endOp == ZSTD_e_continue) {
endOp = ZSTD_e_flush;
}
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,10 +11,10 @@
#ifndef ZSTDMT_COMPRESS_H
#define ZSTDMT_COMPRESS_H
#if defined (__cplusplus)
extern "C" {
#endif
/* === Dependencies === */
#include "../common/zstd_deps.h" /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* Note : This is an internal API.
* These APIs used to be exposed with ZSTDLIB_API,
@@ -25,12 +25,6 @@
* otherwise ZSTDMT_createCCtx*() will fail.
*/
/* === Dependencies === */
#include "../common/zstd_deps.h" /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Constants === */
#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
@@ -105,9 +99,4 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
*/
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTDMT_COMPRESS_H */
File diff suppressed because it is too large Load Diff
@@ -1,5 +1,5 @@
/*
* Copyright (c) Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -10,34 +10,53 @@
#include "../common/portability_macros.h"
#if defined(__ELF__) && defined(__GNUC__)
/* Stack marking
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
*/
#if defined(__ELF__) && defined(__GNUC__)
.section .note.GNU-stack,"",%progbits
#if defined(__aarch64__)
/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
* See: https://github.com/facebook/zstd/issues/3841
* See: https://gcc.godbolt.org/z/sqr5T4ffK
* See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
* See: https://reviews.llvm.org/D62609
*/
.pushsection .note.gnu.property, "a"
.p2align 3
.long 4 /* size of the name - "GNU\0" */
.long 0x10 /* size of descriptor */
.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
.asciz "GNU"
.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
.long 4 /* pr_datasz - 4 bytes */
.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
.popsection
#endif
#endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2
/* Calling convention:
*
* %rdi contains the first argument: HUF_DecompressAsmArgs*.
* %rdi (or %rcx on Windows) contains the first argument: HUF_DecompressAsmArgs*.
* %rbp isn't maintained (no frame pointer).
* %rsp contains the stack pointer that grows down.
* No red-zone is assumed, only addresses >= %rsp are used.
* All register contents are preserved.
*
* TODO: Support Windows calling convention.
*/
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
.global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
.global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
.global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
.global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
.global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
.global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
.global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
.global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
.text
/* Sets up register mappings for clarity.
@@ -95,8 +114,9 @@ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
/* Define both _HUF_* & HUF_* symbols because MacOS
* C symbols are prefixed with '_' & Linux symbols aren't.
*/
_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
push %rbx
@@ -115,7 +135,11 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2
@@ -130,7 +154,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
movq 88(%rax), %bits3
movq 96(%rax), %dtable
push %rax /* argument */
push 104(%rax) /* ilimit */
push 104(%rax) /* ilowest */
push 112(%rax) /* oend */
push %olimit /* olimit space */
@@ -155,11 +179,11 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
shrq $2, %r15
movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilimit */
subq %rdx, %rax /* rax = ip0 - ilimit */
movq %rax, %rbx /* rbx = ip0 - ilimit */
movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %rbx /* rbx = ip0 - ilowest */
/* rdx = (ip0 - ilimit) / 7 */
/* rdx = (ip0 - ilowest) / 7 */
movabsq $2635249153387078803, %rdx
mulq %rdx
subq %rdx, %rbx
@@ -182,9 +206,8 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
/* If (op3 + 20 > olimit) */
movq %op3, %rax /* rax = op3 */
addq $20, %rax /* rax = op3 + 20 */
cmpq %rax, %olimit /* op3 + 20 > olimit */
jb .L_4X1_exit
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X1_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
@@ -315,7 +338,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
/* Restore stack (oend & olimit) */
pop %rax /* olimit */
pop %rax /* oend */
pop %rax /* ilimit */
pop %rax /* ilowest */
pop %rax /* arg */
/* Save ip / op / bits */
@@ -350,8 +373,9 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
pop %rax
ret
_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH
/* Save all registers - even if they are callee saved for simplicity. */
push %rax
push %rbx
@@ -369,7 +393,12 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
push %r14
push %r15
/* Read HUF_DecompressAsmArgs* args from %rax */
#if defined(_WIN32)
movq %rcx, %rax
#else
movq %rdi, %rax
#endif
movq 0(%rax), %ip0
movq 8(%rax), %ip1
movq 16(%rax), %ip2
@@ -385,7 +414,7 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
movq 96(%rax), %dtable
push %rax /* argument */
push %rax /* olimit */
push 104(%rax) /* ilimit */
push 104(%rax) /* ilowest */
movq 112(%rax), %rax
push %rax /* oend3 */
@@ -412,9 +441,9 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
/* We can consume up to 7 input bytes each iteration. */
movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilimit */
subq %rdx, %rax /* rax = ip0 - ilimit */
movq %rax, %r15 /* r15 = ip0 - ilimit */
movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %r15 /* r15 = ip0 - ilowest */
/* rdx = rax / 7 */
movabsq $2635249153387078803, %rdx
@@ -424,44 +453,33 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
addq %r15, %rdx
shrq $2, %rdx
/* r15 = (ip0 - ilimit) / 7 */
/* r15 = (ip0 - ilowest) / 7 */
movq %rdx, %r15
movabsq $-3689348814741910323, %rdx
movq 8(%rsp), %rax /* rax = oend0 */
subq %op0, %rax /* rax = oend0 - op0 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
movq 8(%rsp), %rax /* rax = oend0 */
subq %op0, %rax /* rax = oend0 - op0 */
movq 16(%rsp), %rdx /* rdx = oend1 */
subq %op1, %rdx /* rdx = oend1 - op1 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rdx
movq 16(%rsp), %rax /* rax = oend1 */
subq %op1, %rax /* rax = oend1 - op1 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
movabsq $-3689348814741910323, %rdx
movq 24(%rsp), %rax /* rax = oend2 */
subq %op2, %rax /* rax = oend2 - op2 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rdx
movq 32(%rsp), %rax /* rax = oend3 */
subq %op3, %rax /* rax = oend3 - op3 */
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rax
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
shrq $3, %rdx /* rdx = rdx / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
@@ -476,9 +494,8 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
/* If (op3 + 10 > olimit) */
movq %op3, %rax /* rax = op3 */
addq $10, %rax /* rax = op3 + 10 */
cmpq %rax, %olimit /* op3 + 10 > olimit */
jb .L_4X2_exit
cmpq %rax, %olimit /* op3 == olimit */
je .L_4X2_exit
/* If (ip1 < ip0) go to exit */
cmpq %ip0, %ip1
@@ -546,7 +563,7 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
pop %rax /* oend1 */
pop %rax /* oend2 */
pop %rax /* oend3 */
pop %rax /* ilimit */
pop %rax /* ilowest */
pop %rax /* olimit */
pop %rax /* arg */
+4 -4
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -14,12 +14,12 @@
/*-*******************************************************
* Dependencies
*********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "zstd_decompress_internal.h"
#include "zstd_ddict.h"
@@ -134,7 +134,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
ZSTD_memcpy(internalBuffer, dict, dictSize);
}
ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
@@ -240,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0;
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
return ddict->dictID;
}
+1 -1
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
+294 -114
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -56,13 +56,15 @@
* Dependencies
*********************************************************/
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/error_private.h"
#include "../common/zstd_internal.h" /* blockProperties_t */
#include "../common/mem.h" /* low level memory routines */
#include "../common/bits.h" /* ZSTD_highbit32 */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
#include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
@@ -78,11 +80,11 @@
*************************************/
#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
* Currently, that means a 0.75 load factor.
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
* the load factor of the ddict hash set.
*/
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
* Currently, that means a 0.75 load factor.
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
* the load factor of the ddict hash set.
*/
#define DDICT_HASHSET_TABLE_BASE_SIZE 64
#define DDICT_HASHSET_RESIZE_FACTOR 2
@@ -243,6 +245,8 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->outBufferMode = ZSTD_bm_buffered;
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0;
dctx->maxBlockSizeParam = 0;
}
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@@ -263,6 +267,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
#endif
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
dctx->isFrameDecompression = 1;
#if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
@@ -438,16 +443,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
** or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
{
const BYTE* ip = (const BYTE*)src;
size_t const minInputSize = ZSTD_startingInputLength(format);
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
if (srcSize < minInputSize) return minInputSize;
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
if (srcSize > 0) {
/* note : technically could be considered an assert(), since it's an invalid entry */
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
}
if (srcSize < minInputSize) {
if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
/* when receiving less than @minInputSize bytes,
* control these bytes at least correspond to a supported magic number
* in order to error out early if they don't.
**/
size_t const toCopy = MIN(4, srcSize);
unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
assert(src != NULL);
ZSTD_memcpy(hbuf, src, toCopy);
if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
/* not a zstd frame : let's check if it's a skippable frame */
MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
ZSTD_memcpy(hbuf, src, toCopy);
if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
RETURN_ERROR(prefix_unknown,
"first bytes don't correspond to any supported magic number");
} } }
return minInputSize;
}
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
if ( (format != ZSTD_f_zstd1_magicless)
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -455,8 +484,10 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
zfhPtr->frameType = ZSTD_skippableFrame;
zfhPtr->dictID = MEM_readLE32(src) - ZSTD_MAGIC_SKIPPABLE_START;
zfhPtr->headerSize = ZSTD_SKIPPABLEHEADERSIZE;
zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
return 0;
}
RETURN_ERROR(prefix_unknown, "");
@@ -525,7 +556,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize)
{
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
}
@@ -543,7 +574,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
}
#endif
{ ZSTD_frameHeader zfh;
{ ZSTD_FrameHeader zfh;
if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
return ZSTD_CONTENTSIZE_ERROR;
if (zfh.frameType == ZSTD_skippableFrame) {
@@ -563,49 +594,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported, "");
{
size_t const skippableSize = skippableHeaderSize + sizeU32;
{ size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
return skippableSize;
}
}
/*! ZSTD_readSkippableFrame() :
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
* Retrieves content of a skippable frame, and writes it to dst buffer.
*
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant.
*
* Returns an error if destination buffer is not large enough, or if the frame is not skippable.
* Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
*
* @return : number of bytes written or a ZSTD error.
*/
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
const void* src, size_t srcSize)
size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
unsigned* magicVariant, /* optional, can be NULL */
const void* src, size_t srcSize)
{
U32 const magicNumber = MEM_readLE32(src);
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
/* check input validity */
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
{ U32 const magicNumber = MEM_readLE32(src);
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
/* deliver payload */
if (skippableContentSize > 0 && dst != NULL)
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
if (magicVariant != NULL)
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize;
/* check input validity */
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
/* deliver payload */
if (skippableContentSize > 0 && dst != NULL)
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
if (magicVariant != NULL)
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize;
}
}
/** ZSTD_findDecompressedSize() :
* compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
* skippable frames
* @return : decompressed size of the frames contained */
* note: compatible with legacy mode
* @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
@@ -615,9 +649,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
@@ -625,17 +657,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
continue;
}
{ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
{ unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
/* check for overflow */
if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
totalDstSize += ret;
if (totalDstSize + fcs < totalDstSize)
return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
totalDstSize += fcs;
}
/* skip to next frame */
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
if (ZSTD_isError(frameSrcSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
assert(frameSrcSize <= srcSize);
src = (const BYTE *)src + frameSrcSize;
srcSize -= frameSrcSize;
@@ -699,17 +731,17 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
return frameSizeInfo;
}
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
{
ZSTD_frameSizeInfo frameSizeInfo;
ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize))
if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
#endif
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
@@ -720,10 +752,10 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
const BYTE* const ipstart = ip;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
ZSTD_frameHeader zfh;
ZSTD_FrameHeader zfh;
/* Extract Frame Header */
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
{ size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
if (ZSTD_isError(ret))
return ZSTD_errorFrameSizeInfo(ret);
if (ret > 0)
@@ -757,28 +789,31 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
ip += 4;
}
frameSizeInfo.nbBlocks = nbBlocks;
frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
? zfh.frameContentSize
: nbBlocks * zfh.blockSizeMax;
: (unsigned long long)nbBlocks * zfh.blockSizeMax;
return frameSizeInfo;
}
}
static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
return frameSizeInfo.compressedSize;
}
/** ZSTD_findFrameCompressedSize() :
* compatible with legacy mode
* `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
* `srcSize` must be at least as large as the frame contained
* @return : the compressed size of the frame starting at `src` */
* See docs in zstd.h
* Note: compatible with legacy mode */
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
return frameSizeInfo.compressedSize;
return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
}
/** ZSTD_decompressBound() :
* compatible with legacy mode
* `src` must point to the start of a ZSTD frame or a skippeable frame
* `src` must point to the start of a ZSTD frame or a skippable frame
* `srcSize` must be at least as large as the frame contained
* @return : the maximum decompressed size of the compressed source
*/
@@ -787,7 +822,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
unsigned long long bound = 0;
/* Iterate over each frame */
while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
@@ -800,6 +835,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
return bound;
}
size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
{
size_t margin = 0;
unsigned maxBlockSize = 0;
/* Iterate over each frame */
while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
ZSTD_FrameHeader zfh;
FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ERROR(corruption_detected);
if (zfh.frameType == ZSTD_frame) {
/* Add the frame header to our margin */
margin += zfh.headerSize;
/* Add the checksum to our margin */
margin += zfh.checksumFlag ? 4 : 0;
/* Add 3 bytes per block */
margin += 3 * frameSizeInfo.nbBlocks;
/* Compute the max block size */
maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
} else {
assert(zfh.frameType == ZSTD_skippableFrame);
/* Add the entire skippable frame size to our margin. */
margin += compressedSize;
}
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
}
/* Add the max block size back to the margin. */
margin += maxBlockSize;
return margin;
}
/*-*************************************************************
* Frame decoding
@@ -825,7 +902,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
if (srcSize == 0) return 0;
RETURN_ERROR(dstBuffer_null, "");
}
ZSTD_memcpy(dst, src, srcSize);
ZSTD_memmove(dst, src, srcSize);
return srcSize;
}
@@ -842,7 +919,7 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
return regenSize;
}
static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, int streaming)
{
#if ZSTD_TRACE
if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
@@ -901,8 +978,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
}
/* Shrink the blockSizeMax if enabled */
if (dctx->maxBlockSizeParam != 0)
dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
/* Loop on each block */
while (1) {
BYTE* oBlockEnd = oend;
size_t decodedSize;
blockProperties_t blockProperties;
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
@@ -912,27 +994,48 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
remainingSrcSize -= ZSTD_blockHeaderSize;
RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
if (ip >= op && ip < oBlockEnd) {
/* We are decompressing in-place. Limit the output pointer so that we
* don't overwrite the block that we are currently reading. This will
* fail decompression if the input & output pointers aren't spaced
* far enough apart.
*
* This is important to set, even when the pointers are far enough
* apart, because ZSTD_decompressBlock_internal() can decide to store
* literals in the output buffer, after the block it is decompressing.
* Since we don't want anything to overwrite our input, we have to tell
* ZSTD_decompressBlock_internal to never write past ip.
*
* See ZSTD_allocateLiteralsBuffer() for reference.
*/
oBlockEnd = op + (ip - op);
}
switch(blockProperties.blockType)
{
case bt_compressed:
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
assert(dctx->isFrameDecompression == 1);
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
break;
case bt_raw :
/* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
break;
case bt_rle :
decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
break;
case bt_reserved :
default:
RETURN_ERROR(corruption_detected, "invalid block type");
}
if (ZSTD_isError(decodedSize)) return decodedSize;
if (dctx->validateChecksum)
FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
if (dctx->validateChecksum) {
XXH64_update(&dctx->xxhState, op, decodedSize);
if (decodedSize != 0)
}
if (decodedSize) /* support dst = NULL,0 */ {
op += decodedSize;
}
assert(ip != NULL);
ip += cBlockSize;
remainingSrcSize -= cBlockSize;
@@ -956,12 +1059,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
}
ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
/* Allow caller to get size read */
DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %i, consuming %i bytes of input", (int)(op-ostart), (int)(ip - (const BYTE*)*srcPtr));
*srcPtr = ip;
*srcSizePtr = remainingSrcSize;
return (size_t)(op-ostart);
}
static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
static
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
@@ -981,7 +1087,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
@@ -991,6 +1097,15 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
if (ZSTD_isError(decodedSize)) return decodedSize;
{
unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
"Frame header size does not match decoded size!");
}
}
assert(decodedSize <= dstCapacity);
dst = (BYTE*)dst + decodedSize;
dstCapacity -= decodedSize;
@@ -1002,17 +1117,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
#endif
{ U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
/* skippable frame detected : skip it */
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
continue;
continue; /* check next frame */
} }
if (ddict) {
@@ -1108,8 +1224,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
/**
* Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
* we allow taking a partial block as the input. Currently only raw uncompressed blocks can
* Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
* allow taking a partial block as the input. Currently only raw uncompressed blocks can
* be streamed.
*
* For blocks that can be streamed, this allows us to reduce the latency until we produce
@@ -1228,7 +1344,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{
case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
assert(dctx->isFrameDecompression == 1);
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
dctx->expected = 0; /* Streaming not supported */
break;
case bt_raw :
@@ -1297,6 +1414,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decodeSkippableHeader:
assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
assert(dctx->format != ZSTD_f_zstd1_magicless);
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame;
@@ -1309,7 +1427,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
default:
assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
}
}
@@ -1350,11 +1468,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
/* in minimal huffman, we always use X1 variants */
size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr,
workspace, workspaceSize);
workspace, workspaceSize, /* flags */ 0);
#else
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
dictPtr, (size_t)(dictEnd - dictPtr),
workspace, workspaceSize);
workspace, workspaceSize, /* flags */ 0);
#endif
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
dictPtr += hSize;
@@ -1453,10 +1571,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->prefixStart = NULL;
dctx->virtualStart = NULL;
dctx->dictEnd = NULL;
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
dctx->bType = bt_reserved;
dctx->isFrameDecompression = 1;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
@@ -1515,7 +1634,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* This could for one of the following reasons :
* - The frame does not require a dictionary (most common case).
* - The frame was built with dictID intentionally removed.
* Needed dictionary is a hidden information.
* Needed dictionary is a hidden piece of information.
* Note : this use case also happens when using a non-conformant dictionary.
* - `srcSize` is too small, and as a result, frame header could not be decoded.
* Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
@@ -1524,7 +1643,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* ZSTD_getFrameHeader(), which will provide a more precise error code. */
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
{
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
ZSTD_FrameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
if (ZSTD_isError(hError)) return 0;
return zfp.dictID;
@@ -1631,7 +1750,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
size_t ZSTD_initDStream(ZSTD_DStream* zds)
{
DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDDict(zds, NULL);
FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
return ZSTD_startingInputLength(zds->format);
}
/* ZSTD_initDStream_usingDDict() :
@@ -1639,6 +1760,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
* this function cannot fail */
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
return ZSTD_startingInputLength(dctx->format);
@@ -1649,6 +1771,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
* this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
DEBUGLOG(4, "ZSTD_resetDStream");
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
return ZSTD_startingInputLength(dctx->format);
}
@@ -1720,6 +1843,15 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
return bounds;
case ZSTD_d_disableHuffmanAssembly:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_d_maxBlockSize:
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
return bounds;
default:;
}
bounds.error = ERROR(parameter_unsupported);
@@ -1760,6 +1892,12 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
case ZSTD_d_refMultipleDDicts:
*value = (int)dctx->refMultipleDDicts;
return 0;
case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm;
return 0;
case ZSTD_d_maxBlockSize:
*value = dctx->maxBlockSizeParam;
return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
@@ -1793,6 +1931,14 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
}
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
return 0;
case ZSTD_d_disableHuffmanAssembly:
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0;
return 0;
case ZSTD_d_maxBlockSize:
if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
dctx->maxBlockSizeParam = value;
return 0;
default:;
}
RETURN_ERROR(parameter_unsupported, "");
@@ -1804,6 +1950,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|| (reset == ZSTD_reset_session_and_parameters) ) {
dctx->streamStage = zdss_init;
dctx->noForwardProgress = 0;
dctx->isFrameDecompression = 1;
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
@@ -1820,11 +1967,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
return ZSTD_sizeof_DCtx(dctx);
}
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
{
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
/* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
* ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
* the block at the beginning of the output buffer, and maintain a full window.
*
* We need another blockSize worth of buffer so that we can store split
* literals at the end of the block without overwriting the extDict window.
*/
unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
@@ -1832,6 +1985,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
return minRBSize;
}
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{
return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
}
size_t ZSTD_estimateDStreamSize(size_t windowSize)
{
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
@@ -1843,7 +2001,7 @@ size_t ZSTD_estimateDStreamSize(size_t windowSize)
size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
{
U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
ZSTD_frameHeader zfh;
ZSTD_FrameHeader zfh;
size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
if (ZSTD_isError(err)) return err;
RETURN_ERROR_IF(err>0, srcSize_wrong, "");
@@ -1938,6 +2096,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
U32 someMoreWork = 1;
DEBUGLOG(5, "ZSTD_decompressStream");
assert(zds != NULL);
RETURN_ERROR_IF(
input->pos > input->size,
srcSize_wrong,
@@ -1980,7 +2139,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (zds->refMultipleDDicts && zds->ddictSet) {
ZSTD_DCtx_selectFrameDDict(zds);
}
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
@@ -2012,6 +2170,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput;
}
input->pos = input->size;
/* check first few bytes */
FORWARD_IF_ERROR(
ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
"First few bytes detected incorrect" );
/* return hint input size */
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
}
assert(ip != NULL);
@@ -2023,14 +2186,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
&& zds->fParams.frameType != ZSTD_skippableFrame
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
if (cSize <= (size_t)(iend-istart)) {
/* shortcut : using single-pass mode */
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
if (ZSTD_isError(decompressedSize)) return decompressedSize;
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
assert(istart != NULL);
ip = istart + cSize;
op += decompressedSize;
op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
zds->expected = 0;
zds->streamStage = zdss_init;
someMoreWork = 0;
@@ -2049,7 +2213,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
if (zds->format == ZSTD_f_zstd1
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
@@ -2065,11 +2230,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
frameParameter_windowTooLarge, "");
if (zds->maxBlockSizeParam != 0)
zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
/* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
: 0;
ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
@@ -2114,6 +2281,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
}
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
assert(ip != NULL);
ip += neededInSize;
/* Function modifies the stage so we must break */
break;
@@ -2128,7 +2296,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
int const isSkipFrame = ZSTD_isSkipFrame(zds);
size_t loadedSize;
/* At this point we shouldn't be decompressing a block that we can stream. */
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
if (isSkipFrame) {
loadedSize = MIN(toLoad, (size_t)(iend-ip));
} else {
@@ -2137,8 +2305,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
"should never happen");
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
}
ip += loadedSize;
zds->inPos += loadedSize;
if (loadedSize != 0) {
/* ip may be NULL */
ip += loadedSize;
zds->inPos += loadedSize;
}
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
/* decode loaded input */
@@ -2148,14 +2319,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
break;
}
case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
{
size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
op += flushedSize;
op = op ? op + flushedSize : op;
zds->outStart += flushedSize;
if (flushedSize == toFlushSize) { /* flush completed */
zds->streamStage = zdss_read;
if ( (zds->outBuffSize < zds->fParams.frameContentSize)
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
(int)(zds->outBuffSize - zds->outStart),
(U32)zds->fParams.blockSizeMax);
@@ -2169,7 +2343,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
default:
assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
} }
/* result */
@@ -2182,8 +2356,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if ((ip==istart) && (op==ostart)) { /* no forward progress */
zds->noForwardProgress ++;
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
assert(0);
}
} else {
@@ -2220,11 +2394,17 @@ size_t ZSTD_decompressStream_simpleArgs (
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos)
{
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
ZSTD_inBuffer input = { src, srcSize, *srcPos };
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */
size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
ZSTD_outBuffer output;
ZSTD_inBuffer input;
output.dst = dst;
output.size = dstCapacity;
output.pos = *dstPos;
input.src = src;
input.size = srcSize;
input.pos = *srcPos;
{ size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
}
}
File diff suppressed because it is too large Load Diff
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -47,7 +47,7 @@ typedef enum {
*/
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
const void* src, size_t srcSize, const streaming_operation streaming);
/* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off)
@@ -64,5 +64,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2);
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
#endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
} ZSTD_entropyDTables_t;
@@ -135,7 +136,7 @@ struct ZSTD_DCtx_s
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
const void* dictEnd; /* end of previous segment */
size_t expected;
ZSTD_frameHeader fParams;
ZSTD_FrameHeader fParams;
U64 processedCSize;
U64 decodedSize;
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
@@ -152,7 +153,8 @@ struct ZSTD_DCtx_s
size_t litSize;
size_t rleSize;
size_t staticSize;
#if DYNAMIC_BMI2 != 0
int isFrameDecompression;
#if DYNAMIC_BMI2
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
#endif
@@ -164,6 +166,8 @@ struct ZSTD_DCtx_s
ZSTD_dictUses_e dictUses;
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
int maxBlockSizeParam;
/* streaming */
ZSTD_dStreamStage streamStage;
@@ -207,11 +211,11 @@ struct ZSTD_DCtx_s
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
#if DYNAMIC_BMI2 != 0
return dctx->bmi2;
#if DYNAMIC_BMI2
return dctx->bmi2;
#else
(void)dctx;
return 0;
return 0;
#endif
}
+1 -1
View File
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the

Some files were not shown because too many files have changed in this diff Show More