Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions lib/dictBuilder/fastcover.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
const U32 d = parameters.d;
const U32 f = ctx->f;
const U32 dmersInK = k - d + 1;
/* Last position where a full d-byte dmer can be read without OOB. See #4499. */
const U32 hashEnd = (end >= d) ? end - d + 1 : begin;

/* Try each segment (activeSegment) and save the best (bestSegment) */
COVER_segment_t bestSegment = {0, 0, 0};
Expand All @@ -168,8 +170,12 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,

/* Slide the activeSegment through the whole epoch.
* Save the best segment in bestSegment.
*
* We need at least d bytes starting from activeSegment.end to compute
* a dmer hash, so stop d-1 bytes before the epoch end to avoid an
* out-of-bounds read. See #4499.
*/
while (activeSegment.end < end) {
while (activeSegment.end < hashEnd) {
/* Get hash value of current dmer */
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);

Expand Down Expand Up @@ -200,7 +206,7 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
}

/* Zero out rest of segmentFreqs array */
while (activeSegment.begin < end) {
while (activeSegment.begin < hashEnd) {
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
segmentFreqs[delIndex] -= 1;
activeSegment.begin += 1;
Expand All @@ -209,7 +215,8 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
{
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
U32 pos;
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
const U32 bestEnd = (bestSegment.end <= hashEnd) ? bestSegment.end : hashEnd;
for (pos = bestSegment.begin; pos != bestEnd; ++pos) {
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
freqs[i] = 0;
}
Expand Down Expand Up @@ -415,7 +422,12 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
*/
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
const U32 epochBegin = (U32)(epoch * epochs.size);
const U32 epochEnd = epochBegin + epochs.size;
/* Clamp epochEnd to the sample buffer size to avoid passing an
* out-of-bounds end position to FASTCOVER_selectSegment. See #4499. */
const U32 epochEnd = MIN(epochBegin + epochs.size,
(U32)ctx->nbTrainSamples > 0
? (U32)ctx->offsets[ctx->nbTrainSamples]
: epochBegin);
size_t segmentSize;
/* Select a segment */
COVER_segment_t segment = FASTCOVER_selectSegment(
Expand Down
23 changes: 23 additions & 0 deletions tests/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -3781,6 +3781,29 @@ static int basicUnitTests(U32 const seed, double compressibility)
if (dictID==0) goto _output_error;
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);

DISPLAYLEVEL(3, "test%3i : FASTCOVER with many small samples (issue #4499) : ", testNb++);
/* FASTCOVER_hashPtrToIndex reads d bytes from the sample buffer.
* When all samples are shorter than d, the sliding window must not
* read past the end of the buffer. Triggered a stack-buffer-overflow
* with ASAN before the fix. */
{ const char smallData[8] = {'A','B','C','D','E','F','G','H'};
const size_t smallSizes[8] = {1,1,1,1,1,1,1,1};
ZDICT_fastCover_params_t fcParams;
char *smallDict = (char *)malloc(1024);
size_t smallDictSize;
if (!smallDict) goto _output_error;
memset(&fcParams, 0, sizeof(fcParams));
/* d=8 forces hash8Ptr path; samples are only 1 byte each */
fcParams.d = 8;
fcParams.k = 16;
smallDictSize = ZDICT_trainFromBuffer_fastCover(
smallDict, 1024, smallData, smallSizes, 8, fcParams);
/* May return an error (too-small corpus) but must NOT crash */
(void)smallDictSize;
free(smallDict);
}
DISPLAYLEVEL(3, "OK \n");

ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);
Expand Down
Loading