Skip to content

Commit c9796a1

Browse files
committed
Add zstd decompression (RFC8478)
* README.md: Mention zstandard * configure.ac: Check for libzstd * docs/wget2.md: Document new --compression type * include/wget/wget.h: Add wget_content_encoding_zstd * libwget/decompressor.c: Implement the zstd decompressor * libwget/http_parse.c (wget_http_parse_content_encoding): Add zstd * src/options.c (print_version): Add +/-zstd for --version, (parse_compression): Check for wget_content_encoding_zstd, (options): Add zstd to help text * src/wget.c (http_create_request): Add zstd to Accept-Encoding: * tests/test-compression.c: Add test for zstd
1 parent cc22f0d commit c9796a1

File tree

9 files changed

+151
-20
lines changed

9 files changed

+151
-20
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ A non-exhaustive list of features
3131

3232
- Support for HTTP/1.1 and HTTP/2.0 protocol
3333
- [brotli](https://github.com/google/brotli) decompression support (Accept-Encoding: br)
34+
- [zstandard](https://github.com/facebook/zstd) decompression support, RFC8478 (Accept-Encoding: zstd)
3435
- HPKP - HTTP Public Key Pinning (RFC7469) with persistent database
3536
- TCP Fast Open for plain text *and* for HTTPS
3637
- TLS Session Resumption including persistent session data cache

configure.ac

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,21 @@ AS_IF([test "x$with_brotlidec" != xno], [
578578
])
579579
AM_CONDITIONAL([WITH_BROTLIDEC], [test "x$with_brotlidec" = xyes])
580580

581+
AC_ARG_WITH(zstd, AS_HELP_STRING([--without-zstd], [disable Zstandard compression support]), with_zstd=$withval, with_zstd=yes)
582+
AS_IF([test "x$with_zstd" != xno], [
583+
PKG_CHECK_MODULES([ZSTD], libzstd, [
584+
with_zstd=yes
585+
LIBS="$ZSTD_LIBS $LIBS"
586+
CFLAGS="$ZSTD_CFLAGS $CFLAGS"
587+
AC_DEFINE([WITH_ZSTD], [1], [Use zstd])
588+
], [
589+
AC_SEARCH_LIBS(ZSTD_decompressStream, zstd,
590+
[with_zstd=yes; AC_DEFINE([WITH_ZSTD], [1], [Use libzstd])],
591+
[with_zstd=no; AC_MSG_WARN(*** libzstd was not found. You will not be able to use Zstandard decompression)])
592+
])
593+
])
594+
AM_CONDITIONAL([WITH_ZSTD], [test "x$with_zstd" = xyes])
595+
581596
# Support for internationalized domain names.
582597
# IDN support in Wget2 is provided in multiple ways:
583598
# 1. libidn2 >= 0.14.0 (IDNA 2008)
@@ -766,6 +781,7 @@ AC_MSG_NOTICE([Summary of build options:
766781
BZIP2 compression: $with_bzip2
767782
LZMA compression: $with_lzma
768783
Brotli compression: $with_brotlidec
784+
Zstd compression: $with_zstd
769785
IDNA support: $IDNA_INFO
770786
PSL support: $with_libpsl
771787
HSTS support: $with_libhsts

docs/wget2.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ Go to background immediately after startup. If no output file is specified via t
354354

355355
`ResponseTime` ms between start of request and first response packet.
356356

357-
`Encoding` 0,1,2,3,4,5 mean server side compression was 'identity', 'gzip', 'deflate', 'lzma/xz', 'bzip2', 'brotli'
357+
`Encoding` 0,1,2,3,4,5 mean server side compression was 'identity', 'gzip', 'deflate', 'lzma/xz', 'bzip2', 'brotli', 'zstd'
358358

359359
`Verification` PGP verification status. 0,1,2,3 mean 'none', 'valid', 'invalid', 'bad', 'missing'.
360360

@@ -1330,7 +1330,7 @@ Go to background immediately after startup. If no output file is specified via t
13301330

13311331
### `--compression=TYPE`
13321332

1333-
If this TYPE(`identity`, `gzip`, `deflate`, `xz`, `lzma`, `br`, `bzip2` or any combination of it) is given,
1333+
If this TYPE(`identity`, `gzip`, `deflate`, `xz`, `lzma`, `br`, `bzip2`, `zstd` or any combination of it) is given,
13341334
Wget2 will set "Accept-Encoding" header accordingly. `--no-compression` means no "Accept-Encoding" header at all.
13351335
To set "Accept-Encoding" to a custom value, use `--no-compression` in combination with
13361336
`--header="Accept-Encoding: xxx"`.

include/wget/wget.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,8 @@ typedef enum {
850850
wget_content_encoding_lzma = 4,
851851
wget_content_encoding_bzip2 = 5,
852852
wget_content_encoding_brotli = 6,
853-
wget_content_encoding_max = 7
853+
wget_content_encoding_zstd = 7,
854+
wget_content_encoding_max = 8
854855
} wget_content_encoding_type_t;
855856

856857
WGETAPI G_GNUC_WGET_PURE wget_content_encoding_type_t

libwget/decompressor.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@
5454
#include <brotli/decode.h>
5555
#endif
5656

57+
#ifdef WITH_ZSTD
58+
#include <zstd.h>
59+
#endif
60+
5761
#include <wget.h>
5862
#include "private.h"
5963

@@ -77,6 +81,10 @@ struct _wget_decompressor_st {
7781
BrotliDecoderState
7882
*brotli_strm;
7983
#endif
84+
#ifdef WITH_ZSTD
85+
ZSTD_DStream
86+
*zstd_strm;
87+
#endif
8088

8189
wget_decompressor_sink_t
8290
sink; // decompressed data goes here
@@ -279,6 +287,64 @@ static void brotli_exit(wget_decompressor_t *dc)
279287
}
280288
#endif // WITH_BROTLIDEC
281289

290+
#ifdef WITH_ZSTD
291+
static int zstd_init(ZSTD_DStream **strm)
292+
{
293+
if ((*strm = ZSTD_createDStream()) == NULL) {
294+
error_printf(_("Failed to create Zstandard decompression\n"));
295+
return -1;
296+
}
297+
298+
size_t rc = ZSTD_initDStream(*strm);
299+
if (ZSTD_isError(rc)) {
300+
error_printf(_("Failed to init Zstandard decompression: %s\n"), ZSTD_getErrorName(rc));
301+
ZSTD_freeDStream(*strm);
302+
*strm = NULL;
303+
return -1;
304+
}
305+
306+
return 0;
307+
}
308+
309+
static int zstd_decompress(wget_decompressor_t *dc, char *src, size_t srclen)
310+
{
311+
ZSTD_DStream *strm;
312+
uint8_t dst[10240];
313+
314+
if (!srclen) {
315+
// special case to avoid decompress errors
316+
if (dc->sink)
317+
dc->sink(dc->context, "", 0);
318+
319+
return 0;
320+
}
321+
322+
strm = dc->zstd_strm;
323+
324+
ZSTD_inBuffer input = { .src = src, .size = srclen, .pos = 0 };
325+
326+
while (input.pos < input.size) {
327+
ZSTD_outBuffer output = { .dst = dst, .size = sizeof(dst), .pos = 0 };
328+
329+
size_t rc = ZSTD_decompressStream(strm, &output , &input);
330+
if (ZSTD_isError(rc)) {
331+
error_printf(_("Failed to init Zstandard decompression: %s\n"), ZSTD_getErrorName(rc));
332+
return -1;
333+
}
334+
335+
if (dc->sink)
336+
dc->sink(dc->context, (char *)dst, output.pos);
337+
}
338+
339+
return 0;
340+
}
341+
342+
static void zstd_exit(wget_decompressor_t *dc)
343+
{
344+
ZSTD_freeDStream(dc->zstd_strm);
345+
}
346+
#endif // WITH_ZSTD
347+
282348
#ifdef WITH_BZIP2
283349
static int bzip2_init(bz_stream *strm)
284350
{
@@ -384,6 +450,13 @@ wget_decompressor_t *wget_decompress_open(
384450
dc->decompress = brotli_decompress;
385451
dc->exit = brotli_exit;
386452
}
453+
#endif
454+
} else if (encoding == wget_content_encoding_zstd) {
455+
#ifdef WITH_ZSTD
456+
if ((rc = zstd_init(&dc->zstd_strm)) == 0) {
457+
dc->decompress = zstd_decompress;
458+
dc->exit = zstd_exit;
459+
}
387460
#endif
388461
}
389462

@@ -444,6 +517,7 @@ static char _encoding_names[wget_content_encoding_max][9] = {
444517
[wget_content_encoding_lzma] = "lzma",
445518
[wget_content_encoding_bzip2] = "bzip2",
446519
[wget_content_encoding_brotli] = "br",
520+
[wget_content_encoding_zstd] = "zstd",
447521
};
448522

449523
wget_content_encoding_type_t wget_content_encoding_by_name(const char *name)

libwget/http_parse.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,8 @@ const char *wget_http_parse_content_encoding(const char *s, char *content_encodi
682682
*content_encoding = wget_content_encoding_lzma;
683683
else if (!wget_strcasecmp_ascii(s, "br"))
684684
*content_encoding = wget_content_encoding_brotli;
685+
else if (!wget_strcasecmp_ascii(s, "zstd"))
686+
*content_encoding = wget_content_encoding_zstd;
685687
else
686688
*content_encoding = wget_content_encoding_identity;
687689

src/options.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ static int print_version(G_GNUC_WGET_UNUSED option_t opt, G_GNUC_WGET_UNUSED con
209209
" -brotlidec"
210210
#endif
211211

212+
#if defined WITH_ZSTD
213+
" +zstd"
214+
#else
215+
" -zstd"
216+
#endif
217+
212218
#if defined WITH_BZIP2
213219
" +bzip2"
214220
#else
@@ -1017,6 +1023,10 @@ static int parse_compression(option_t opt, const char *val, const char invert)
10171023
if (type == wget_content_encoding_brotli)
10181024
not_built = 1;
10191025
#endif
1026+
#ifndef WITH_ZSTD
1027+
if (type == wget_content_encoding_zstd)
1028+
not_built = 1;
1029+
#endif
10201030

10211031
if (not_built) {
10221032
wget_error_printf(_("Lib for type %s not built"), wget_content_encoding_to_name(type));
@@ -1242,7 +1252,7 @@ static const struct optionw options[] = {
12421252
"compression", &config.compression, parse_compression, -1, 0,
12431253
SECTION_HTTP,
12441254
{ "Customize Accept-Encoding with\n",
1245-
"identity, gzip, deflate, xz, lzma, br, bzip2\n",
1255+
"identity, gzip, deflate, xz, lzma, br, bzip2, zstd\n",
12461256
"and any combination of it\n",
12471257
"no-compression means no Accept-Encoding\n"
12481258
}

src/wget.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3568,6 +3568,10 @@ static wget_http_request_t *http_create_request(wget_iri_t *iri, JOB *job)
35683568
#ifdef WITH_BROTLIDEC
35693569
wget_buffer_strcat(&buf, buf.length ? ", br" : "br");
35703570
#endif
3571+
#ifdef WITH_ZSTD
3572+
wget_buffer_strcat(&buf, buf.length ? ", zstd" : "zstd");
3573+
#endif
3574+
35713575
if (!buf.length)
35723576
wget_buffer_strcat(&buf, "identity");
35733577

tests/test-compression.c

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#define XZ "\xfd\x37\x7a\x58\x5a\x00\x00\x04\xe6\xd6\xb4\x46\x02\x00\x21\x01\x16\x00\x00\x00\x74\x2f\xe5\xa3\x01\x00\x00\x78\x00\x00\x00\x00\x45\xae\xef\x83\xf8\xee\x16\x0a\x00\x01\x19\x01\xa5\x2c\x81\xcc\x1f\xb6\xf3\x7d\x01\x00\x00\x00\x00\x04\x59\x5a" // xz
3131
#define LZMA "\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x3c\x41\xfb\xff\xff\xff\xe0\x00\x00\x00" // lzma
3232
#define BR "\x21\x00\x00\x04\x78\x03" // br
33+
#define ZSTD "\x28\xb5\x2f\xfd\x24\x01\x09\x00\x00\x78\x23\x11\x04\x83"
3334

3435
typedef struct {
3536
const char* body;
@@ -38,7 +39,7 @@ typedef struct {
3839
bool with_lib;
3940
} compression_test_t;
4041

41-
#define KNOWN_TYPES 6
42+
#define KNOWN_TYPES countof(compressions)
4243
#define BUF_SIZE 40
4344

4445
int main(void)
@@ -102,6 +103,16 @@ int main(void)
102103
true,
103104
#else
104105
false,
106+
#endif
107+
},
108+
{ .body = ZSTD,
109+
.body_len = sizeof(ZSTD) - 1,
110+
.type = "zstd",
111+
.with_lib =
112+
#ifdef WITH_ZSTD
113+
true,
114+
#else
115+
false,
105116
#endif
106117
},
107118
};
@@ -179,6 +190,18 @@ int main(void)
179190
"Accept-Encoding: br"
180191
}
181192
},
193+
{ .name = "/zstd.html",
194+
.code = "200 Dontcare",
195+
.body = compressions[6].body,
196+
.body_len = compressions[6].body_len,
197+
.headers = {
198+
"Content-Type: text/html",
199+
"Content-Encoding: zstd",
200+
},
201+
.expected_req_headers = {
202+
"Accept-Encoding: zstd"
203+
}
204+
},
182205
{ .name = "/identity.html",
183206
.code = "200 Dontcare",
184207
.body = uncompressed_body,
@@ -254,32 +277,32 @@ int main(void)
254277
wget_test(
255278
// WGET_TEST_KEEP_TMPFILES, 1,
256279
WGET_TEST_OPTIONS, "--compression=none",
257-
WGET_TEST_REQUEST_URL, urls[6].name + 1,
280+
WGET_TEST_REQUEST_URL, urls[7].name + 1,
258281
WGET_TEST_EXPECTED_ERROR_CODE, 0,
259282
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
260-
{ urls[6].name + 1, uncompressed_body },
283+
{ urls[7].name + 1, uncompressed_body },
261284
{ NULL } },
262285
0);
263286

264287
// test identity
265288
wget_test(
266289
// WGET_TEST_KEEP_TMPFILES, 1,
267290
WGET_TEST_OPTIONS, "--compression=identity",
268-
WGET_TEST_REQUEST_URL, urls[6].name + 1,
291+
WGET_TEST_REQUEST_URL, urls[7].name + 1,
269292
WGET_TEST_EXPECTED_ERROR_CODE, 0,
270293
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
271-
{ urls[6].name + 1, uncompressed_body },
294+
{ urls[7].name + 1, uncompressed_body },
272295
{ NULL } },
273296
0);
274297

275298
// test no "Accept-Encoding"
276299
wget_test(
277300
// WGET_TEST_KEEP_TMPFILES, 1,
278301
WGET_TEST_OPTIONS, "--no-compression",
279-
WGET_TEST_REQUEST_URL, urls[7].name + 1,
302+
WGET_TEST_REQUEST_URL, urls[8].name + 1,
280303
WGET_TEST_EXPECTED_ERROR_CODE, 0,
281304
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
282-
{ urls[7].name + 1, uncompressed_body },
305+
{ urls[8].name + 1, uncompressed_body },
283306
{ NULL } },
284307
0);
285308

@@ -295,44 +318,44 @@ int main(void)
295318
wget_test(
296319
// WGET_TEST_KEEP_TMPFILES, 1,
297320
WGET_TEST_OPTIONS, "--no-compression --header=\"Accept-Encoding: identity\"",
298-
WGET_TEST_REQUEST_URL, urls[6].name + 1,
321+
WGET_TEST_REQUEST_URL, urls[7].name + 1,
299322
WGET_TEST_EXPECTED_ERROR_CODE, 0,
300323
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
301-
{ urls[6].name + 1, uncompressed_body },
324+
{ urls[7].name + 1, uncompressed_body },
302325
{ NULL } },
303326
0);
304327

305328
// test --compression overide
306329
wget_test(
307330
// WGET_TEST_KEEP_TMPFILES, 1,
308331
WGET_TEST_OPTIONS, "--no-compression --compression=identity",
309-
WGET_TEST_REQUEST_URL, urls[6].name + 1,
332+
WGET_TEST_REQUEST_URL, urls[7].name + 1,
310333
WGET_TEST_EXPECTED_ERROR_CODE, 0,
311334
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
312-
{ urls[6].name + 1, uncompressed_body },
335+
{ urls[7].name + 1, uncompressed_body },
313336
{ NULL } },
314337
0);
315338

316339
// test --no-compression overide
317340
wget_test(
318341
// WGET_TEST_KEEP_TMPFILES, 1,
319342
WGET_TEST_OPTIONS, "--compression=identity --no-compression",
320-
WGET_TEST_REQUEST_URL, urls[7].name + 1,
343+
WGET_TEST_REQUEST_URL, urls[8].name + 1,
321344
WGET_TEST_EXPECTED_ERROR_CODE, 0,
322345
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
323-
{ urls[7].name + 1, uncompressed_body },
346+
{ urls[8].name + 1, uncompressed_body },
324347
{ NULL } },
325348
0);
326349

327350
// test combination
328351
wget_test(
329352
// WGET_TEST_KEEP_TMPFILES, 1,
330353
WGET_TEST_OPTIONS, "--compression=identity,br,lzma,gzip",
331-
WGET_TEST_REQUEST_URL, urls[8].name + 1,
354+
WGET_TEST_REQUEST_URL, urls[9].name + 1,
332355
#if defined WITH_BROTLIDEC && defined WITH_LZMA && defined WITH_ZLIB
333356
WGET_TEST_EXPECTED_ERROR_CODE, 0,
334357
WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
335-
{ urls[8].name + 1, uncompressed_body },
358+
{ urls[9].name + 1, uncompressed_body },
336359
{ NULL } },
337360
#else
338361
WGET_TEST_EXPECTED_ERROR_CODE, 2,
@@ -358,7 +381,7 @@ int main(void)
358381
wget_test(
359382
// WGET_TEST_KEEP_TMPFILES, 1,
360383
WGET_TEST_OPTIONS, "--compression=identity,identity",
361-
WGET_TEST_REQUEST_URL, urls[8].name + 1,
384+
WGET_TEST_REQUEST_URL, urls[9].name + 1,
362385
WGET_TEST_EXPECTED_ERROR_CODE, 2,
363386
0);
364387

0 commit comments

Comments
 (0)