Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
898 changes: 580 additions & 318 deletions contrib/seekable_format/tests/seekable_tests.c

Large diffs are not rendered by default.

138 changes: 100 additions & 38 deletions contrib/seqBench/seqBench.c
Original file line number Diff line number Diff line change
@@ -1,53 +1,115 @@
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <zstd.h>

int main(int argc, char *argv[]) {
ZSTD_CCtx* zc = ZSTD_createCCtx();
int ret = 0;
FILE *f = NULL;
char *inBuf = NULL;
ZSTD_Sequence *seqs = NULL;
char *outBuf = NULL;
char *validationBuf = NULL;
ZSTD_CCtx *zc = ZSTD_createCCtx();

if (argc != 2) {
printf("Usage: seqBench <file>\n"); // TODO provide the block delim option here
return 1;
}
if (!zc) {
fprintf(stderr, "ERROR: ZSTD_createCCtx failed\n");
return 1;
}

FILE *f = fopen(argv[1], "rb");
fseek(f, 0, SEEK_END);
long inBufSize = ftell(f);
fseek(f, 0, SEEK_SET);
if (argc != 2) {
fprintf(stderr, "Usage: seqBench <file>\n");
ret = 1;
goto cleanup;
}

char *inBuf = malloc(inBufSize + 1);
fread(inBuf, inBufSize, 1, f);
fclose(f);
f = fopen(argv[1], "rb");
if (!f) {
fprintf(stderr, "ERROR: Could not open %s\n", argv[1]);
ret = 1;
goto cleanup;
}
fseek(f, 0, SEEK_END);
long inBufSize = ftell(f);
fseek(f, 0, SEEK_SET);

size_t seqsSize = ZSTD_sequenceBound(inBufSize);
ZSTD_Sequence *seqs = (ZSTD_Sequence*)malloc(seqsSize * sizeof(ZSTD_Sequence));
char *outBuf = malloc(ZSTD_compressBound(inBufSize));
inBuf = malloc(inBufSize + 1);
if (!inBuf) {
fprintf(stderr, "ERROR: malloc failed for inBuf\n");
ret = 1;
goto cleanup;
}

ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, inBufSize);
ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
size_t outBufSize = ZSTD_compressSequences(zc, outBuf, inBufSize, seqs, seqsSize, inBuf, inBufSize);
if (ZSTD_isError(outBufSize)) {
printf("ERROR: %lu\n", outBufSize);
return 1;
}
if (fread(inBuf, 1, inBufSize, f) != (size_t)inBufSize) {
fprintf(stderr, "ERROR: fread failed to read full file\n");
ret = 1;
goto cleanup;
}
fclose(f);
f = NULL;

char *validationBuf = malloc(inBufSize);
ZSTD_decompress(validationBuf, inBufSize, outBuf, outBufSize);

if (memcmp(inBuf, validationBuf, inBufSize) == 0) {
printf("Compression and decompression were successful!\n");
} else {
printf("ERROR: input and validation buffers don't match!\n");
for (int i = 0; i < inBufSize; i++) {
if (inBuf[i] != validationBuf[i]) {
printf("First bad index: %d\n", i);
break;
}
}
size_t seqsSize = ZSTD_sequenceBound(inBufSize);
seqs = (ZSTD_Sequence *)malloc(seqsSize * sizeof(ZSTD_Sequence));
if (!seqs) {
fprintf(stderr, "ERROR: malloc failed for seqs\n");
ret = 1;
goto cleanup;
}

outBuf = malloc(ZSTD_compressBound(inBufSize));
if (!outBuf) {
fprintf(stderr, "ERROR: malloc failed for outBuf\n");
ret = 1;
goto cleanup;
}

ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, inBufSize);
ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters,
ZSTD_sf_explicitBlockDelimiters);
size_t outBufSize = ZSTD_compressSequences(zc, outBuf, inBufSize, seqs,
seqsSize, inBuf, inBufSize);
if (ZSTD_isError(outBufSize)) {
fprintf(stderr, "ERROR: %s\n", ZSTD_getErrorName(outBufSize));
ret = 1;
goto cleanup;
}

validationBuf = malloc(inBufSize);
if (!validationBuf) {
fprintf(stderr, "ERROR: malloc failed for validationBuf\n");
ret = 1;
goto cleanup;
}
ZSTD_decompress(validationBuf, inBufSize, outBuf, outBufSize);

if (memcmp(inBuf, validationBuf, inBufSize) == 0) {
printf("Compression and decompression were successful!\n");
printf("Original size: %ld bytes\n", inBufSize);
printf("Compressed size: %zu bytes\n", outBufSize);
if (outBufSize > 0) {
printf("Ratio: %.2f\n", (double)inBufSize / outBufSize);
}
} else {
fprintf(stderr, "ERROR: input and validation buffers don't match!\n");
for (int i = 0; i < inBufSize; i++) {
if (inBuf[i] != validationBuf[i]) {
fprintf(stderr, "First bad index: %d\n", i);
break;
}
}
ret = 1;
}

cleanup:
if (f)
fclose(f);
free(inBuf);
free(seqs);
free(outBuf);
free(validationBuf);
ZSTD_freeCCtx(zc);

return 0;
return ret;
}
9 changes: 7 additions & 2 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ all: simple_compression simple_decompression \
multiple_simple_compression\
dictionary_compression dictionary_decompression \
streaming_compression streaming_decompression \
multiple_streaming_compression streaming_memory_usage
multiple_streaming_compression streaming_memory_usage \
streaming_dictionary_compression

$(LIB) :
$(MAKE) -C $(LIBDIR) libzstd.a
Expand Down Expand Up @@ -53,6 +54,9 @@ streaming_decompression : $(LIB)
streaming_memory_usage.o: common.h
streaming_memory_usage : $(LIB)

streaming_dictionary_compression.o: common.h
streaming_dictionary_compression : $(LIB)


.PHONY:clean
clean:
Expand All @@ -61,7 +65,8 @@ clean:
multiple_simple_compression \
dictionary_compression dictionary_decompression \
streaming_compression streaming_decompression \
multiple_streaming_compression streaming_memory_usage
multiple_streaming_compression streaming_memory_usage \
streaming_dictionary_compression
@echo Cleaning completed

.PHONY:test
Expand Down
4 changes: 4 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,7 @@ Zstandard library : usage examples
Decompress multiple files using the same dictionary.
Result remains in memory.
Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`

- [Streaming dictionary compression](streaming_dictionary_compression.c) :
Compress multiple files in streaming mode using the same dictionary.
Introduces usage of : `ZSTD_CCtx_loadDictionary()` and `ZSTD_compressStream2()`
138 changes: 138 additions & 0 deletions examples/streaming_dictionary_compression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

/* This example demonstrates streaming compression with a dictionary.
* It combines the streaming approach from streaming_compression.c
* with the dictionary approach from dictionary_compression.c.
*
* This is useful when compressing many small files (e.g. database records,
* JSON objects, log entries) in a streaming fashion using a pre-trained
* dictionary for better compression ratios on small data.
*
* It uses the advanced API:
* - ZSTD_CCtx_loadDictionary() to load the dictionary
* - ZSTD_compressStream2() to stream-compress the input
*
* Usage: streaming_dictionary_compression DICT FILES...
*/

#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
#include <stdio.h> // printf
#include <stdlib.h> // free
#include <string.h> // memset, strcat, strlen
#include <zstd.h> // presumes zstd library is installed

static void compressFile_orDie(const char *fname, const char *outName,
const void *dictBuffer, size_t dictSize,
int cLevel) {
/* Open the input and output files. */
FILE *const fin = fopen_orDie(fname, "rb");
FILE *const fout = fopen_orDie(outName, "wb");

/* Create the input and output buffers.
* They may be any size, but we recommend using these functions to size them.
*/
size_t const buffInSize = ZSTD_CStreamInSize();
void *const buffIn = malloc_orDie(buffInSize);
size_t const buffOutSize = ZSTD_CStreamOutSize();
void *const buffOut = malloc_orDie(buffOutSize);

/* Create the compression context. */
ZSTD_CCtx *const cctx = ZSTD_createCCtx();
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");

/* Set compression parameters. */
CHECK_ZSTD(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
CHECK_ZSTD(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));

/* Load the dictionary.
* The dictionary will be used for all subsequent compressions using this
* context, until it is reset or a new dictionary is loaded.
* ZSTD_CCtx_loadDictionary() makes an internal copy of the dictionary,
* so we can free dictBuffer after this call if we wanted to.
*/
CHECK_ZSTD(ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize));

/* Stream-compress the input file. */
size_t const toRead = buffInSize;
for (;;) {
size_t read = fread_orDie(buffIn, toRead, fin);
int const lastChunk = (read < toRead);
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
ZSTD_inBuffer input = {buffIn, read, 0};
int finished;
do {
ZSTD_outBuffer output = {buffOut, buffOutSize, 0};
size_t const remaining =
ZSTD_compressStream2(cctx, &output, &input, mode);
CHECK_ZSTD(remaining);
fwrite_orDie(buffOut, output.pos, fout);
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
} while (!finished);
CHECK(input.pos == input.size, "Impossible: zstd only returns 0 when the "
"input is completely consumed!");
if (lastChunk) {
break;
}
}

ZSTD_freeCCtx(cctx);
fclose_orDie(fout);
fclose_orDie(fin);
free(buffIn);
free(buffOut);
}

static char *createOutFilename_orDie(const char *filename) {
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void *outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (char *)outSpace;
}

int main(int argc, const char **argv) {
const char *const exeName = argv[0];
int const cLevel = 3;

if (argc < 3) {
fprintf(stderr, "wrong arguments\n");
fprintf(stderr, "usage:\n");
fprintf(stderr, "%s DICT [FILES...]\n", exeName);
fprintf(stderr,
"\nCompress FILES using streaming mode with a dictionary.\n");
fprintf(stderr, "DICT is a dictionary file created with `zstd --train`.\n");
return 1;
}

/* Load dictionary into memory.
* The dictionary is loaded once and reused for all files. */
const char *const dictName = argv[1];
size_t dictSize;
void *const dictBuffer = mallocAndLoadFile_orDie(dictName, &dictSize);
printf("loading dictionary %s (%zu bytes)\n", dictName, dictSize);

/* Compress each file with the dictionary. */
int u;
for (u = 2; u < argc; u++) {
const char *const inFilename = argv[u];
char *const outFilename = createOutFilename_orDie(inFilename);
compressFile_orDie(inFilename, outFilename, dictBuffer, dictSize, cLevel);
printf("%25s : compressed with dictionary -> %s\n", inFilename,
outFilename);
free(outFilename);
}

free(dictBuffer);
printf("All %u files compressed with dictionary. \n", argc - 2);
return 0;
}
Loading