xref: /freebsd/sys/contrib/zstd/lib/dictBuilder/cover.c (revision 0f743729abbfc5c9d78a713f72241a4d4bd601ec)
10c16b537SWarner Losh /*
20c16b537SWarner Losh  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
30c16b537SWarner Losh  * All rights reserved.
40c16b537SWarner Losh  *
50c16b537SWarner Losh  * This source code is licensed under both the BSD-style license (found in the
60c16b537SWarner Losh  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
70c16b537SWarner Losh  * in the COPYING file in the root directory of this source tree).
80c16b537SWarner Losh  * You may select, at your option, one of the above-listed licenses.
90c16b537SWarner Losh  */
100c16b537SWarner Losh 
110c16b537SWarner Losh /* *****************************************************************************
120c16b537SWarner Losh  * Constructs a dictionary using a heuristic based on the following paper:
130c16b537SWarner Losh  *
140c16b537SWarner Losh  * Liao, Petri, Moffat, Wirth
150c16b537SWarner Losh  * Effective Construction of Relative Lempel-Ziv Dictionaries
160c16b537SWarner Losh  * Published in WWW 2016.
170c16b537SWarner Losh  *
180c16b537SWarner Losh  * Adapted from code originally written by @ot (Giuseppe Ottaviano).
190c16b537SWarner Losh  ******************************************************************************/
200c16b537SWarner Losh 
210c16b537SWarner Losh /*-*************************************
220c16b537SWarner Losh *  Dependencies
230c16b537SWarner Losh ***************************************/
240c16b537SWarner Losh #include <stdio.h>  /* fprintf */
250c16b537SWarner Losh #include <stdlib.h> /* malloc, free, qsort */
260c16b537SWarner Losh #include <string.h> /* memset */
270c16b537SWarner Losh #include <time.h>   /* clock */
280c16b537SWarner Losh 
290c16b537SWarner Losh #include "mem.h" /* read */
300c16b537SWarner Losh #include "pool.h"
310c16b537SWarner Losh #include "threading.h"
32*0f743729SConrad Meyer #include "cover.h"
330c16b537SWarner Losh #include "zstd_internal.h" /* includes zstd.h */
340c16b537SWarner Losh #ifndef ZDICT_STATIC_LINKING_ONLY
350c16b537SWarner Losh #define ZDICT_STATIC_LINKING_ONLY
360c16b537SWarner Losh #endif
370c16b537SWarner Losh #include "zdict.h"
380c16b537SWarner Losh 
390c16b537SWarner Losh /*-*************************************
400c16b537SWarner Losh *  Constants
410c16b537SWarner Losh ***************************************/
420c16b537SWarner Losh #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
43*0f743729SConrad Meyer #define DEFAULT_SPLITPOINT 1.0
440c16b537SWarner Losh 
450c16b537SWarner Losh /*-*************************************
460c16b537SWarner Losh *  Console display
470c16b537SWarner Losh ***************************************/
480c16b537SWarner Losh static int g_displayLevel = 2;
490c16b537SWarner Losh #define DISPLAY(...)                                                           \
500c16b537SWarner Losh   {                                                                            \
510c16b537SWarner Losh     fprintf(stderr, __VA_ARGS__);                                              \
520c16b537SWarner Losh     fflush(stderr);                                                            \
530c16b537SWarner Losh   }
540c16b537SWarner Losh #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
550c16b537SWarner Losh   if (displayLevel >= l) {                                                     \
560c16b537SWarner Losh     DISPLAY(__VA_ARGS__);                                                      \
570c16b537SWarner Losh   } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
580c16b537SWarner Losh #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
590c16b537SWarner Losh 
600c16b537SWarner Losh #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
610c16b537SWarner Losh   if (displayLevel >= l) {                                                     \
620c16b537SWarner Losh     if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
630c16b537SWarner Losh       g_time = clock();                                                        \
640c16b537SWarner Losh       DISPLAY(__VA_ARGS__);                                                    \
650c16b537SWarner Losh     }                                                                          \
660c16b537SWarner Losh   }
670c16b537SWarner Losh #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
680c16b537SWarner Losh static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
690c16b537SWarner Losh static clock_t g_time = 0;
700c16b537SWarner Losh 
710c16b537SWarner Losh /*-*************************************
720c16b537SWarner Losh * Hash table
730c16b537SWarner Losh ***************************************
740c16b537SWarner Losh * A small specialized hash map for storing activeDmers.
750c16b537SWarner Losh * The map does not resize, so if it becomes full it will loop forever.
760c16b537SWarner Losh * Thus, the map must be large enough to store every value.
770c16b537SWarner Losh * The map implements linear probing and keeps its load less than 0.5.
780c16b537SWarner Losh */
790c16b537SWarner Losh 
800c16b537SWarner Losh #define MAP_EMPTY_VALUE ((U32)-1)
810c16b537SWarner Losh typedef struct COVER_map_pair_t_s {
820c16b537SWarner Losh   U32 key;
830c16b537SWarner Losh   U32 value;
840c16b537SWarner Losh } COVER_map_pair_t;
850c16b537SWarner Losh 
860c16b537SWarner Losh typedef struct COVER_map_s {
870c16b537SWarner Losh   COVER_map_pair_t *data;
880c16b537SWarner Losh   U32 sizeLog;
890c16b537SWarner Losh   U32 size;
900c16b537SWarner Losh   U32 sizeMask;
910c16b537SWarner Losh } COVER_map_t;
920c16b537SWarner Losh 
930c16b537SWarner Losh /**
940c16b537SWarner Losh  * Clear the map.
950c16b537SWarner Losh  */
960c16b537SWarner Losh static void COVER_map_clear(COVER_map_t *map) {
970c16b537SWarner Losh   memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
980c16b537SWarner Losh }
990c16b537SWarner Losh 
1000c16b537SWarner Losh /**
1010c16b537SWarner Losh  * Initializes a map of the given size.
1020c16b537SWarner Losh  * Returns 1 on success and 0 on failure.
1030c16b537SWarner Losh  * The map must be destroyed with COVER_map_destroy().
1040c16b537SWarner Losh  * The map is only guaranteed to be large enough to hold size elements.
1050c16b537SWarner Losh  */
1060c16b537SWarner Losh static int COVER_map_init(COVER_map_t *map, U32 size) {
1070c16b537SWarner Losh   map->sizeLog = ZSTD_highbit32(size) + 2;
1080c16b537SWarner Losh   map->size = (U32)1 << map->sizeLog;
1090c16b537SWarner Losh   map->sizeMask = map->size - 1;
1100c16b537SWarner Losh   map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
1110c16b537SWarner Losh   if (!map->data) {
1120c16b537SWarner Losh     map->sizeLog = 0;
1130c16b537SWarner Losh     map->size = 0;
1140c16b537SWarner Losh     return 0;
1150c16b537SWarner Losh   }
1160c16b537SWarner Losh   COVER_map_clear(map);
1170c16b537SWarner Losh   return 1;
1180c16b537SWarner Losh }
1190c16b537SWarner Losh 
1200c16b537SWarner Losh /**
1210c16b537SWarner Losh  * Internal hash function
1220c16b537SWarner Losh  */
1230c16b537SWarner Losh static const U32 prime4bytes = 2654435761U;
1240c16b537SWarner Losh static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
1250c16b537SWarner Losh   return (key * prime4bytes) >> (32 - map->sizeLog);
1260c16b537SWarner Losh }
1270c16b537SWarner Losh 
1280c16b537SWarner Losh /**
1290c16b537SWarner Losh  * Helper function that returns the index that a key should be placed into.
1300c16b537SWarner Losh  */
1310c16b537SWarner Losh static U32 COVER_map_index(COVER_map_t *map, U32 key) {
1320c16b537SWarner Losh   const U32 hash = COVER_map_hash(map, key);
1330c16b537SWarner Losh   U32 i;
1340c16b537SWarner Losh   for (i = hash;; i = (i + 1) & map->sizeMask) {
1350c16b537SWarner Losh     COVER_map_pair_t *pos = &map->data[i];
1360c16b537SWarner Losh     if (pos->value == MAP_EMPTY_VALUE) {
1370c16b537SWarner Losh       return i;
1380c16b537SWarner Losh     }
1390c16b537SWarner Losh     if (pos->key == key) {
1400c16b537SWarner Losh       return i;
1410c16b537SWarner Losh     }
1420c16b537SWarner Losh   }
1430c16b537SWarner Losh }
1440c16b537SWarner Losh 
1450c16b537SWarner Losh /**
1460c16b537SWarner Losh  * Returns the pointer to the value for key.
1470c16b537SWarner Losh  * If key is not in the map, it is inserted and the value is set to 0.
1480c16b537SWarner Losh  * The map must not be full.
1490c16b537SWarner Losh  */
1500c16b537SWarner Losh static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
1510c16b537SWarner Losh   COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
1520c16b537SWarner Losh   if (pos->value == MAP_EMPTY_VALUE) {
1530c16b537SWarner Losh     pos->key = key;
1540c16b537SWarner Losh     pos->value = 0;
1550c16b537SWarner Losh   }
1560c16b537SWarner Losh   return &pos->value;
1570c16b537SWarner Losh }
1580c16b537SWarner Losh 
1590c16b537SWarner Losh /**
1600c16b537SWarner Losh  * Deletes key from the map if present.
1610c16b537SWarner Losh  */
1620c16b537SWarner Losh static void COVER_map_remove(COVER_map_t *map, U32 key) {
1630c16b537SWarner Losh   U32 i = COVER_map_index(map, key);
1640c16b537SWarner Losh   COVER_map_pair_t *del = &map->data[i];
1650c16b537SWarner Losh   U32 shift = 1;
1660c16b537SWarner Losh   if (del->value == MAP_EMPTY_VALUE) {
1670c16b537SWarner Losh     return;
1680c16b537SWarner Losh   }
1690c16b537SWarner Losh   for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
1700c16b537SWarner Losh     COVER_map_pair_t *const pos = &map->data[i];
1710c16b537SWarner Losh     /* If the position is empty we are done */
1720c16b537SWarner Losh     if (pos->value == MAP_EMPTY_VALUE) {
1730c16b537SWarner Losh       del->value = MAP_EMPTY_VALUE;
1740c16b537SWarner Losh       return;
1750c16b537SWarner Losh     }
1760c16b537SWarner Losh     /* If pos can be moved to del do so */
1770c16b537SWarner Losh     if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
1780c16b537SWarner Losh       del->key = pos->key;
1790c16b537SWarner Losh       del->value = pos->value;
1800c16b537SWarner Losh       del = pos;
1810c16b537SWarner Losh       shift = 1;
1820c16b537SWarner Losh     } else {
1830c16b537SWarner Losh       ++shift;
1840c16b537SWarner Losh     }
1850c16b537SWarner Losh   }
1860c16b537SWarner Losh }
1870c16b537SWarner Losh 
1880c16b537SWarner Losh /**
189*0f743729SConrad Meyer  * Destroys a map that is inited with COVER_map_init().
1900c16b537SWarner Losh  */
1910c16b537SWarner Losh static void COVER_map_destroy(COVER_map_t *map) {
1920c16b537SWarner Losh   if (map->data) {
1930c16b537SWarner Losh     free(map->data);
1940c16b537SWarner Losh   }
1950c16b537SWarner Losh   map->data = NULL;
1960c16b537SWarner Losh   map->size = 0;
1970c16b537SWarner Losh }
1980c16b537SWarner Losh 
1990c16b537SWarner Losh /*-*************************************
2000c16b537SWarner Losh * Context
2010c16b537SWarner Losh ***************************************/
2020c16b537SWarner Losh 
2030c16b537SWarner Losh typedef struct {
2040c16b537SWarner Losh   const BYTE *samples;
2050c16b537SWarner Losh   size_t *offsets;
2060c16b537SWarner Losh   const size_t *samplesSizes;
2070c16b537SWarner Losh   size_t nbSamples;
208*0f743729SConrad Meyer   size_t nbTrainSamples;
209*0f743729SConrad Meyer   size_t nbTestSamples;
2100c16b537SWarner Losh   U32 *suffix;
2110c16b537SWarner Losh   size_t suffixSize;
2120c16b537SWarner Losh   U32 *freqs;
2130c16b537SWarner Losh   U32 *dmerAt;
2140c16b537SWarner Losh   unsigned d;
2150c16b537SWarner Losh } COVER_ctx_t;
2160c16b537SWarner Losh 
2170c16b537SWarner Losh /* We need a global context for qsort... */
2180c16b537SWarner Losh static COVER_ctx_t *g_ctx = NULL;
2190c16b537SWarner Losh 
2200c16b537SWarner Losh /*-*************************************
2210c16b537SWarner Losh *  Helper functions
2220c16b537SWarner Losh ***************************************/
2230c16b537SWarner Losh 
2240c16b537SWarner Losh /**
2250c16b537SWarner Losh  * Returns the sum of the sample sizes.
2260c16b537SWarner Losh  */
227*0f743729SConrad Meyer size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
2280c16b537SWarner Losh   size_t sum = 0;
229*0f743729SConrad Meyer   unsigned i;
2300c16b537SWarner Losh   for (i = 0; i < nbSamples; ++i) {
2310c16b537SWarner Losh     sum += samplesSizes[i];
2320c16b537SWarner Losh   }
2330c16b537SWarner Losh   return sum;
2340c16b537SWarner Losh }
2350c16b537SWarner Losh 
2360c16b537SWarner Losh /**
2370c16b537SWarner Losh  * Returns -1 if the dmer at lp is less than the dmer at rp.
2380c16b537SWarner Losh  * Return 0 if the dmers at lp and rp are equal.
2390c16b537SWarner Losh  * Returns 1 if the dmer at lp is greater than the dmer at rp.
2400c16b537SWarner Losh  */
2410c16b537SWarner Losh static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
2420c16b537SWarner Losh   U32 const lhs = *(U32 const *)lp;
2430c16b537SWarner Losh   U32 const rhs = *(U32 const *)rp;
2440c16b537SWarner Losh   return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
2450c16b537SWarner Losh }
2460c16b537SWarner Losh /**
2470c16b537SWarner Losh  * Faster version for d <= 8.
2480c16b537SWarner Losh  */
2490c16b537SWarner Losh static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
2500c16b537SWarner Losh   U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
2510c16b537SWarner Losh   U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
2520c16b537SWarner Losh   U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
2530c16b537SWarner Losh   if (lhs < rhs) {
2540c16b537SWarner Losh     return -1;
2550c16b537SWarner Losh   }
2560c16b537SWarner Losh   return (lhs > rhs);
2570c16b537SWarner Losh }
2580c16b537SWarner Losh 
2590c16b537SWarner Losh /**
2600c16b537SWarner Losh  * Same as COVER_cmp() except ties are broken by pointer value
2610c16b537SWarner Losh  * NOTE: g_ctx must be set to call this function.  A global is required because
2620c16b537SWarner Losh  * qsort doesn't take an opaque pointer.
2630c16b537SWarner Losh  */
2640c16b537SWarner Losh static int COVER_strict_cmp(const void *lp, const void *rp) {
2650c16b537SWarner Losh   int result = COVER_cmp(g_ctx, lp, rp);
2660c16b537SWarner Losh   if (result == 0) {
2670c16b537SWarner Losh     result = lp < rp ? -1 : 1;
2680c16b537SWarner Losh   }
2690c16b537SWarner Losh   return result;
2700c16b537SWarner Losh }
2710c16b537SWarner Losh /**
2720c16b537SWarner Losh  * Faster version for d <= 8.
2730c16b537SWarner Losh  */
2740c16b537SWarner Losh static int COVER_strict_cmp8(const void *lp, const void *rp) {
2750c16b537SWarner Losh   int result = COVER_cmp8(g_ctx, lp, rp);
2760c16b537SWarner Losh   if (result == 0) {
2770c16b537SWarner Losh     result = lp < rp ? -1 : 1;
2780c16b537SWarner Losh   }
2790c16b537SWarner Losh   return result;
2800c16b537SWarner Losh }
2810c16b537SWarner Losh 
2820c16b537SWarner Losh /**
2830c16b537SWarner Losh  * Returns the first pointer in [first, last) whose element does not compare
2840c16b537SWarner Losh  * less than value.  If no such element exists it returns last.
2850c16b537SWarner Losh  */
2860c16b537SWarner Losh static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
2870c16b537SWarner Losh                                        size_t value) {
2880c16b537SWarner Losh   size_t count = last - first;
2890c16b537SWarner Losh   while (count != 0) {
2900c16b537SWarner Losh     size_t step = count / 2;
2910c16b537SWarner Losh     const size_t *ptr = first;
2920c16b537SWarner Losh     ptr += step;
2930c16b537SWarner Losh     if (*ptr < value) {
2940c16b537SWarner Losh       first = ++ptr;
2950c16b537SWarner Losh       count -= step + 1;
2960c16b537SWarner Losh     } else {
2970c16b537SWarner Losh       count = step;
2980c16b537SWarner Losh     }
2990c16b537SWarner Losh   }
3000c16b537SWarner Losh   return first;
3010c16b537SWarner Losh }
3020c16b537SWarner Losh 
3030c16b537SWarner Losh /**
3040c16b537SWarner Losh  * Generic groupBy function.
3050c16b537SWarner Losh  * Groups an array sorted by cmp into groups with equivalent values.
3060c16b537SWarner Losh  * Calls grp for each group.
3070c16b537SWarner Losh  */
3080c16b537SWarner Losh static void
3090c16b537SWarner Losh COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
3100c16b537SWarner Losh               int (*cmp)(COVER_ctx_t *, const void *, const void *),
3110c16b537SWarner Losh               void (*grp)(COVER_ctx_t *, const void *, const void *)) {
3120c16b537SWarner Losh   const BYTE *ptr = (const BYTE *)data;
3130c16b537SWarner Losh   size_t num = 0;
3140c16b537SWarner Losh   while (num < count) {
3150c16b537SWarner Losh     const BYTE *grpEnd = ptr + size;
3160c16b537SWarner Losh     ++num;
3170c16b537SWarner Losh     while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
3180c16b537SWarner Losh       grpEnd += size;
3190c16b537SWarner Losh       ++num;
3200c16b537SWarner Losh     }
3210c16b537SWarner Losh     grp(ctx, ptr, grpEnd);
3220c16b537SWarner Losh     ptr = grpEnd;
3230c16b537SWarner Losh   }
3240c16b537SWarner Losh }
3250c16b537SWarner Losh 
3260c16b537SWarner Losh /*-*************************************
3270c16b537SWarner Losh *  Cover functions
3280c16b537SWarner Losh ***************************************/
3290c16b537SWarner Losh 
3300c16b537SWarner Losh /**
3310c16b537SWarner Losh  * Called on each group of positions with the same dmer.
3320c16b537SWarner Losh  * Counts the frequency of each dmer and saves it in the suffix array.
3330c16b537SWarner Losh  * Fills `ctx->dmerAt`.
3340c16b537SWarner Losh  */
3350c16b537SWarner Losh static void COVER_group(COVER_ctx_t *ctx, const void *group,
3360c16b537SWarner Losh                         const void *groupEnd) {
3370c16b537SWarner Losh   /* The group consists of all the positions with the same first d bytes. */
3380c16b537SWarner Losh   const U32 *grpPtr = (const U32 *)group;
3390c16b537SWarner Losh   const U32 *grpEnd = (const U32 *)groupEnd;
3400c16b537SWarner Losh   /* The dmerId is how we will reference this dmer.
3410c16b537SWarner Losh    * This allows us to map the whole dmer space to a much smaller space, the
3420c16b537SWarner Losh    * size of the suffix array.
3430c16b537SWarner Losh    */
3440c16b537SWarner Losh   const U32 dmerId = (U32)(grpPtr - ctx->suffix);
3450c16b537SWarner Losh   /* Count the number of samples this dmer shows up in */
3460c16b537SWarner Losh   U32 freq = 0;
3470c16b537SWarner Losh   /* Details */
3480c16b537SWarner Losh   const size_t *curOffsetPtr = ctx->offsets;
3490c16b537SWarner Losh   const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
3500c16b537SWarner Losh   /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
3510c16b537SWarner Losh    * different sample than the last.
3520c16b537SWarner Losh    */
3530c16b537SWarner Losh   size_t curSampleEnd = ctx->offsets[0];
3540c16b537SWarner Losh   for (; grpPtr != grpEnd; ++grpPtr) {
3550c16b537SWarner Losh     /* Save the dmerId for this position so we can get back to it. */
3560c16b537SWarner Losh     ctx->dmerAt[*grpPtr] = dmerId;
3570c16b537SWarner Losh     /* Dictionaries only help for the first reference to the dmer.
3580c16b537SWarner Losh      * After that zstd can reference the match from the previous reference.
3590c16b537SWarner Losh      * So only count each dmer once for each sample it is in.
3600c16b537SWarner Losh      */
3610c16b537SWarner Losh     if (*grpPtr < curSampleEnd) {
3620c16b537SWarner Losh       continue;
3630c16b537SWarner Losh     }
3640c16b537SWarner Losh     freq += 1;
3650c16b537SWarner Losh     /* Binary search to find the end of the sample *grpPtr is in.
3660c16b537SWarner Losh      * In the common case that grpPtr + 1 == grpEnd we can skip the binary
3670c16b537SWarner Losh      * search because the loop is over.
3680c16b537SWarner Losh      */
3690c16b537SWarner Losh     if (grpPtr + 1 != grpEnd) {
3700c16b537SWarner Losh       const size_t *sampleEndPtr =
3710c16b537SWarner Losh           COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
3720c16b537SWarner Losh       curSampleEnd = *sampleEndPtr;
3730c16b537SWarner Losh       curOffsetPtr = sampleEndPtr + 1;
3740c16b537SWarner Losh     }
3750c16b537SWarner Losh   }
3760c16b537SWarner Losh   /* At this point we are never going to look at this segment of the suffix
3770c16b537SWarner Losh    * array again.  We take advantage of this fact to save memory.
3780c16b537SWarner Losh    * We store the frequency of the dmer in the first position of the group,
3790c16b537SWarner Losh    * which is dmerId.
3800c16b537SWarner Losh    */
3810c16b537SWarner Losh   ctx->suffix[dmerId] = freq;
3820c16b537SWarner Losh }
3830c16b537SWarner Losh 
3840c16b537SWarner Losh 
3850c16b537SWarner Losh /**
3860c16b537SWarner Losh  * Selects the best segment in an epoch.
3870c16b537SWarner Losh  * Segments of are scored according to the function:
3880c16b537SWarner Losh  *
3890c16b537SWarner Losh  * Let F(d) be the frequency of dmer d.
3900c16b537SWarner Losh  * Let S_i be the dmer at position i of segment S which has length k.
3910c16b537SWarner Losh  *
3920c16b537SWarner Losh  *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
3930c16b537SWarner Losh  *
3940c16b537SWarner Losh  * Once the dmer d is in the dictionay we set F(d) = 0.
3950c16b537SWarner Losh  */
3960c16b537SWarner Losh static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
3970c16b537SWarner Losh                                            COVER_map_t *activeDmers, U32 begin,
3980c16b537SWarner Losh                                            U32 end,
3990c16b537SWarner Losh                                            ZDICT_cover_params_t parameters) {
4000c16b537SWarner Losh   /* Constants */
4010c16b537SWarner Losh   const U32 k = parameters.k;
4020c16b537SWarner Losh   const U32 d = parameters.d;
4030c16b537SWarner Losh   const U32 dmersInK = k - d + 1;
4040c16b537SWarner Losh   /* Try each segment (activeSegment) and save the best (bestSegment) */
4050c16b537SWarner Losh   COVER_segment_t bestSegment = {0, 0, 0};
4060c16b537SWarner Losh   COVER_segment_t activeSegment;
4070c16b537SWarner Losh   /* Reset the activeDmers in the segment */
4080c16b537SWarner Losh   COVER_map_clear(activeDmers);
4090c16b537SWarner Losh   /* The activeSegment starts at the beginning of the epoch. */
4100c16b537SWarner Losh   activeSegment.begin = begin;
4110c16b537SWarner Losh   activeSegment.end = begin;
4120c16b537SWarner Losh   activeSegment.score = 0;
4130c16b537SWarner Losh   /* Slide the activeSegment through the whole epoch.
4140c16b537SWarner Losh    * Save the best segment in bestSegment.
4150c16b537SWarner Losh    */
4160c16b537SWarner Losh   while (activeSegment.end < end) {
4170c16b537SWarner Losh     /* The dmerId for the dmer at the next position */
4180c16b537SWarner Losh     U32 newDmer = ctx->dmerAt[activeSegment.end];
4190c16b537SWarner Losh     /* The entry in activeDmers for this dmerId */
4200c16b537SWarner Losh     U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
4210c16b537SWarner Losh     /* If the dmer isn't already present in the segment add its score. */
4220c16b537SWarner Losh     if (*newDmerOcc == 0) {
4230c16b537SWarner Losh       /* The paper suggest using the L-0.5 norm, but experiments show that it
4240c16b537SWarner Losh        * doesn't help.
4250c16b537SWarner Losh        */
4260c16b537SWarner Losh       activeSegment.score += freqs[newDmer];
4270c16b537SWarner Losh     }
4280c16b537SWarner Losh     /* Add the dmer to the segment */
4290c16b537SWarner Losh     activeSegment.end += 1;
4300c16b537SWarner Losh     *newDmerOcc += 1;
4310c16b537SWarner Losh 
4320c16b537SWarner Losh     /* If the window is now too large, drop the first position */
4330c16b537SWarner Losh     if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
4340c16b537SWarner Losh       U32 delDmer = ctx->dmerAt[activeSegment.begin];
4350c16b537SWarner Losh       U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
4360c16b537SWarner Losh       activeSegment.begin += 1;
4370c16b537SWarner Losh       *delDmerOcc -= 1;
4380c16b537SWarner Losh       /* If this is the last occurence of the dmer, subtract its score */
4390c16b537SWarner Losh       if (*delDmerOcc == 0) {
4400c16b537SWarner Losh         COVER_map_remove(activeDmers, delDmer);
4410c16b537SWarner Losh         activeSegment.score -= freqs[delDmer];
4420c16b537SWarner Losh       }
4430c16b537SWarner Losh     }
4440c16b537SWarner Losh 
4450c16b537SWarner Losh     /* If this segment is the best so far save it */
4460c16b537SWarner Losh     if (activeSegment.score > bestSegment.score) {
4470c16b537SWarner Losh       bestSegment = activeSegment;
4480c16b537SWarner Losh     }
4490c16b537SWarner Losh   }
4500c16b537SWarner Losh   {
4510c16b537SWarner Losh     /* Trim off the zero frequency head and tail from the segment. */
4520c16b537SWarner Losh     U32 newBegin = bestSegment.end;
4530c16b537SWarner Losh     U32 newEnd = bestSegment.begin;
4540c16b537SWarner Losh     U32 pos;
4550c16b537SWarner Losh     for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
4560c16b537SWarner Losh       U32 freq = freqs[ctx->dmerAt[pos]];
4570c16b537SWarner Losh       if (freq != 0) {
4580c16b537SWarner Losh         newBegin = MIN(newBegin, pos);
4590c16b537SWarner Losh         newEnd = pos + 1;
4600c16b537SWarner Losh       }
4610c16b537SWarner Losh     }
4620c16b537SWarner Losh     bestSegment.begin = newBegin;
4630c16b537SWarner Losh     bestSegment.end = newEnd;
4640c16b537SWarner Losh   }
4650c16b537SWarner Losh   {
4660c16b537SWarner Losh     /* Zero out the frequency of each dmer covered by the chosen segment. */
4670c16b537SWarner Losh     U32 pos;
4680c16b537SWarner Losh     for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
4690c16b537SWarner Losh       freqs[ctx->dmerAt[pos]] = 0;
4700c16b537SWarner Losh     }
4710c16b537SWarner Losh   }
4720c16b537SWarner Losh   return bestSegment;
4730c16b537SWarner Losh }
4740c16b537SWarner Losh 
4750c16b537SWarner Losh /**
4760c16b537SWarner Losh  * Check the validity of the parameters.
4770c16b537SWarner Losh  * Returns non-zero if the parameters are valid and 0 otherwise.
4780c16b537SWarner Losh  */
4790c16b537SWarner Losh static int COVER_checkParameters(ZDICT_cover_params_t parameters,
4800c16b537SWarner Losh                                  size_t maxDictSize) {
4810c16b537SWarner Losh   /* k and d are required parameters */
4820c16b537SWarner Losh   if (parameters.d == 0 || parameters.k == 0) {
4830c16b537SWarner Losh     return 0;
4840c16b537SWarner Losh   }
4850c16b537SWarner Losh   /* k <= maxDictSize */
4860c16b537SWarner Losh   if (parameters.k > maxDictSize) {
4870c16b537SWarner Losh     return 0;
4880c16b537SWarner Losh   }
4890c16b537SWarner Losh   /* d <= k */
4900c16b537SWarner Losh   if (parameters.d > parameters.k) {
4910c16b537SWarner Losh     return 0;
4920c16b537SWarner Losh   }
493*0f743729SConrad Meyer   /* 0 < splitPoint <= 1 */
494*0f743729SConrad Meyer   if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
495*0f743729SConrad Meyer     return 0;
496*0f743729SConrad Meyer   }
4970c16b537SWarner Losh   return 1;
4980c16b537SWarner Losh }
4990c16b537SWarner Losh 
5000c16b537SWarner Losh /**
5010c16b537SWarner Losh  * Clean up a context initialized with `COVER_ctx_init()`.
5020c16b537SWarner Losh  */
5030c16b537SWarner Losh static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
5040c16b537SWarner Losh   if (!ctx) {
5050c16b537SWarner Losh     return;
5060c16b537SWarner Losh   }
5070c16b537SWarner Losh   if (ctx->suffix) {
5080c16b537SWarner Losh     free(ctx->suffix);
5090c16b537SWarner Losh     ctx->suffix = NULL;
5100c16b537SWarner Losh   }
5110c16b537SWarner Losh   if (ctx->freqs) {
5120c16b537SWarner Losh     free(ctx->freqs);
5130c16b537SWarner Losh     ctx->freqs = NULL;
5140c16b537SWarner Losh   }
5150c16b537SWarner Losh   if (ctx->dmerAt) {
5160c16b537SWarner Losh     free(ctx->dmerAt);
5170c16b537SWarner Losh     ctx->dmerAt = NULL;
5180c16b537SWarner Losh   }
5190c16b537SWarner Losh   if (ctx->offsets) {
5200c16b537SWarner Losh     free(ctx->offsets);
5210c16b537SWarner Losh     ctx->offsets = NULL;
5220c16b537SWarner Losh   }
5230c16b537SWarner Losh }
5240c16b537SWarner Losh 
5250c16b537SWarner Losh /**
5260c16b537SWarner Losh  * Prepare a context for dictionary building.
5270c16b537SWarner Losh  * The context is only dependent on the parameter `d` and can used multiple
5280c16b537SWarner Losh  * times.
5290c16b537SWarner Losh  * Returns 1 on success or zero on error.
5300c16b537SWarner Losh  * The context must be destroyed with `COVER_ctx_destroy()`.
5310c16b537SWarner Losh  */
5320c16b537SWarner Losh static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
5330c16b537SWarner Losh                           const size_t *samplesSizes, unsigned nbSamples,
534*0f743729SConrad Meyer                           unsigned d, double splitPoint) {
5350c16b537SWarner Losh   const BYTE *const samples = (const BYTE *)samplesBuffer;
5360c16b537SWarner Losh   const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
537*0f743729SConrad Meyer   /* Split samples into testing and training sets */
538*0f743729SConrad Meyer   const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
539*0f743729SConrad Meyer   const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
540*0f743729SConrad Meyer   const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
541*0f743729SConrad Meyer   const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
5420c16b537SWarner Losh   /* Checks */
5430c16b537SWarner Losh   if (totalSamplesSize < MAX(d, sizeof(U64)) ||
5440c16b537SWarner Losh       totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
54519fcbaf1SConrad Meyer     DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
54619fcbaf1SConrad Meyer                  (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
5470c16b537SWarner Losh     return 0;
5480c16b537SWarner Losh   }
549*0f743729SConrad Meyer   /* Check if there are at least 5 training samples */
550*0f743729SConrad Meyer   if (nbTrainSamples < 5) {
551*0f743729SConrad Meyer     DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552*0f743729SConrad Meyer     return 0;
553*0f743729SConrad Meyer   }
554*0f743729SConrad Meyer   /* Check if there's testing sample */
555*0f743729SConrad Meyer   if (nbTestSamples < 1) {
556*0f743729SConrad Meyer     DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557*0f743729SConrad Meyer     return 0;
558*0f743729SConrad Meyer   }
5590c16b537SWarner Losh   /* Zero the context */
5600c16b537SWarner Losh   memset(ctx, 0, sizeof(*ctx));
561*0f743729SConrad Meyer   DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
562*0f743729SConrad Meyer                (U32)trainingSamplesSize);
563*0f743729SConrad Meyer   DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
564*0f743729SConrad Meyer                (U32)testSamplesSize);
5650c16b537SWarner Losh   ctx->samples = samples;
5660c16b537SWarner Losh   ctx->samplesSizes = samplesSizes;
5670c16b537SWarner Losh   ctx->nbSamples = nbSamples;
568*0f743729SConrad Meyer   ctx->nbTrainSamples = nbTrainSamples;
569*0f743729SConrad Meyer   ctx->nbTestSamples = nbTestSamples;
5700c16b537SWarner Losh   /* Partial suffix array */
571*0f743729SConrad Meyer   ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
5720c16b537SWarner Losh   ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
5730c16b537SWarner Losh   /* Maps index to the dmerID */
5740c16b537SWarner Losh   ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
5750c16b537SWarner Losh   /* The offsets of each file */
5760c16b537SWarner Losh   ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
5770c16b537SWarner Losh   if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
5780c16b537SWarner Losh     DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
5790c16b537SWarner Losh     COVER_ctx_destroy(ctx);
5800c16b537SWarner Losh     return 0;
5810c16b537SWarner Losh   }
5820c16b537SWarner Losh   ctx->freqs = NULL;
5830c16b537SWarner Losh   ctx->d = d;
5840c16b537SWarner Losh 
585*0f743729SConrad Meyer   /* Fill offsets from the samplesSizes */
5860c16b537SWarner Losh   {
5870c16b537SWarner Losh     U32 i;
5880c16b537SWarner Losh     ctx->offsets[0] = 0;
5890c16b537SWarner Losh     for (i = 1; i <= nbSamples; ++i) {
5900c16b537SWarner Losh       ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
5910c16b537SWarner Losh     }
5920c16b537SWarner Losh   }
5930c16b537SWarner Losh   DISPLAYLEVEL(2, "Constructing partial suffix array\n");
5940c16b537SWarner Losh   {
5950c16b537SWarner Losh     /* suffix is a partial suffix array.
5960c16b537SWarner Losh      * It only sorts suffixes by their first parameters.d bytes.
5970c16b537SWarner Losh      * The sort is stable, so each dmer group is sorted by position in input.
5980c16b537SWarner Losh      */
5990c16b537SWarner Losh     U32 i;
6000c16b537SWarner Losh     for (i = 0; i < ctx->suffixSize; ++i) {
6010c16b537SWarner Losh       ctx->suffix[i] = i;
6020c16b537SWarner Losh     }
603*0f743729SConrad Meyer     /* qsort doesn't take an opaque pointer, so pass as a global.
604*0f743729SConrad Meyer      * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605*0f743729SConrad Meyer      */
6060c16b537SWarner Losh     g_ctx = ctx;
607*0f743729SConrad Meyer #if defined(__OpenBSD__)
608*0f743729SConrad Meyer     mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609*0f743729SConrad Meyer           (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
610*0f743729SConrad Meyer #else
6110c16b537SWarner Losh     qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
6120c16b537SWarner Losh           (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
613*0f743729SConrad Meyer #endif
6140c16b537SWarner Losh   }
6150c16b537SWarner Losh   DISPLAYLEVEL(2, "Computing frequencies\n");
6160c16b537SWarner Losh   /* For each dmer group (group of positions with the same first d bytes):
6170c16b537SWarner Losh    * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
6180c16b537SWarner Losh    *    (groupBeginPtr - suffix).  This allows us to go from position to
6190c16b537SWarner Losh    *    dmerID so we can look up values in freq.
6200c16b537SWarner Losh    * 2. We calculate how many samples the dmer occurs in and save it in
6210c16b537SWarner Losh    *    freqs[dmerId].
6220c16b537SWarner Losh    */
6230c16b537SWarner Losh   COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
6240c16b537SWarner Losh                 (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
6250c16b537SWarner Losh   ctx->freqs = ctx->suffix;
6260c16b537SWarner Losh   ctx->suffix = NULL;
6270c16b537SWarner Losh   return 1;
6280c16b537SWarner Losh }
6290c16b537SWarner Losh 
6300c16b537SWarner Losh /**
6310c16b537SWarner Losh  * Given the prepared context build the dictionary.
6320c16b537SWarner Losh  */
6330c16b537SWarner Losh static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
6340c16b537SWarner Losh                                     COVER_map_t *activeDmers, void *dictBuffer,
6350c16b537SWarner Losh                                     size_t dictBufferCapacity,
6360c16b537SWarner Losh                                     ZDICT_cover_params_t parameters) {
6370c16b537SWarner Losh   BYTE *const dict = (BYTE *)dictBuffer;
6380c16b537SWarner Losh   size_t tail = dictBufferCapacity;
6390c16b537SWarner Losh   /* Divide the data up into epochs of equal size.
6400c16b537SWarner Losh    * We will select at least one segment from each epoch.
6410c16b537SWarner Losh    */
642*0f743729SConrad Meyer   const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
6430c16b537SWarner Losh   const U32 epochSize = (U32)(ctx->suffixSize / epochs);
6440c16b537SWarner Losh   size_t epoch;
6450c16b537SWarner Losh   DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
6460c16b537SWarner Losh                epochSize);
6470c16b537SWarner Losh   /* Loop through the epochs until there are no more segments or the dictionary
6480c16b537SWarner Losh    * is full.
6490c16b537SWarner Losh    */
6500c16b537SWarner Losh   for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
6510c16b537SWarner Losh     const U32 epochBegin = (U32)(epoch * epochSize);
6520c16b537SWarner Losh     const U32 epochEnd = epochBegin + epochSize;
6530c16b537SWarner Losh     size_t segmentSize;
6540c16b537SWarner Losh     /* Select a segment */
6550c16b537SWarner Losh     COVER_segment_t segment = COVER_selectSegment(
6560c16b537SWarner Losh         ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
6570c16b537SWarner Losh     /* If the segment covers no dmers, then we are out of content */
6580c16b537SWarner Losh     if (segment.score == 0) {
6590c16b537SWarner Losh       break;
6600c16b537SWarner Losh     }
6610c16b537SWarner Losh     /* Trim the segment if necessary and if it is too small then we are done */
6620c16b537SWarner Losh     segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
6630c16b537SWarner Losh     if (segmentSize < parameters.d) {
6640c16b537SWarner Losh       break;
6650c16b537SWarner Losh     }
6660c16b537SWarner Losh     /* We fill the dictionary from the back to allow the best segments to be
6670c16b537SWarner Losh      * referenced with the smallest offsets.
6680c16b537SWarner Losh      */
6690c16b537SWarner Losh     tail -= segmentSize;
6700c16b537SWarner Losh     memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
6710c16b537SWarner Losh     DISPLAYUPDATE(
6720c16b537SWarner Losh         2, "\r%u%%       ",
6730c16b537SWarner Losh         (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
6740c16b537SWarner Losh   }
6750c16b537SWarner Losh   DISPLAYLEVEL(2, "\r%79s\r", "");
6760c16b537SWarner Losh   return tail;
6770c16b537SWarner Losh }
6780c16b537SWarner Losh 
6790c16b537SWarner Losh ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
68019fcbaf1SConrad Meyer     void *dictBuffer, size_t dictBufferCapacity,
68119fcbaf1SConrad Meyer     const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
68219fcbaf1SConrad Meyer     ZDICT_cover_params_t parameters)
68319fcbaf1SConrad Meyer {
6840c16b537SWarner Losh   BYTE* const dict = (BYTE*)dictBuffer;
6850c16b537SWarner Losh   COVER_ctx_t ctx;
6860c16b537SWarner Losh   COVER_map_t activeDmers;
687*0f743729SConrad Meyer   parameters.splitPoint = 1.0;
68819fcbaf1SConrad Meyer   /* Initialize global data */
68919fcbaf1SConrad Meyer   g_displayLevel = parameters.zParams.notificationLevel;
6900c16b537SWarner Losh   /* Checks */
6910c16b537SWarner Losh   if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
6920c16b537SWarner Losh     DISPLAYLEVEL(1, "Cover parameters incorrect\n");
6930c16b537SWarner Losh     return ERROR(GENERIC);
6940c16b537SWarner Losh   }
6950c16b537SWarner Losh   if (nbSamples == 0) {
6960c16b537SWarner Losh     DISPLAYLEVEL(1, "Cover must have at least one input file\n");
6970c16b537SWarner Losh     return ERROR(GENERIC);
6980c16b537SWarner Losh   }
6990c16b537SWarner Losh   if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
7000c16b537SWarner Losh     DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
7010c16b537SWarner Losh                  ZDICT_DICTSIZE_MIN);
7020c16b537SWarner Losh     return ERROR(dstSize_tooSmall);
7030c16b537SWarner Losh   }
7040c16b537SWarner Losh   /* Initialize context and activeDmers */
7050c16b537SWarner Losh   if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
706*0f743729SConrad Meyer                       parameters.d, parameters.splitPoint)) {
7070c16b537SWarner Losh     return ERROR(GENERIC);
7080c16b537SWarner Losh   }
7090c16b537SWarner Losh   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
7100c16b537SWarner Losh     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
7110c16b537SWarner Losh     COVER_ctx_destroy(&ctx);
7120c16b537SWarner Losh     return ERROR(GENERIC);
7130c16b537SWarner Losh   }
7140c16b537SWarner Losh 
7150c16b537SWarner Losh   DISPLAYLEVEL(2, "Building dictionary\n");
7160c16b537SWarner Losh   {
7170c16b537SWarner Losh     const size_t tail =
7180c16b537SWarner Losh         COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
7190c16b537SWarner Losh                               dictBufferCapacity, parameters);
7200c16b537SWarner Losh     const size_t dictionarySize = ZDICT_finalizeDictionary(
7210c16b537SWarner Losh         dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
7220c16b537SWarner Losh         samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
7230c16b537SWarner Losh     if (!ZSTD_isError(dictionarySize)) {
7240c16b537SWarner Losh       DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
7250c16b537SWarner Losh                    (U32)dictionarySize);
7260c16b537SWarner Losh     }
7270c16b537SWarner Losh     COVER_ctx_destroy(&ctx);
7280c16b537SWarner Losh     COVER_map_destroy(&activeDmers);
7290c16b537SWarner Losh     return dictionarySize;
7300c16b537SWarner Losh   }
7310c16b537SWarner Losh }
7320c16b537SWarner Losh 
733*0f743729SConrad Meyer 
734*0f743729SConrad Meyer 
735*0f743729SConrad Meyer size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
736*0f743729SConrad Meyer                                     const size_t *samplesSizes, const BYTE *samples,
737*0f743729SConrad Meyer                                     size_t *offsets,
738*0f743729SConrad Meyer                                     size_t nbTrainSamples, size_t nbSamples,
739*0f743729SConrad Meyer                                     BYTE *const dict, size_t dictBufferCapacity) {
740*0f743729SConrad Meyer   size_t totalCompressedSize = ERROR(GENERIC);
741*0f743729SConrad Meyer   /* Pointers */
742*0f743729SConrad Meyer   ZSTD_CCtx *cctx;
743*0f743729SConrad Meyer   ZSTD_CDict *cdict;
744*0f743729SConrad Meyer   void *dst;
745*0f743729SConrad Meyer   /* Local variables */
746*0f743729SConrad Meyer   size_t dstCapacity;
747*0f743729SConrad Meyer   size_t i;
748*0f743729SConrad Meyer   /* Allocate dst with enough space to compress the maximum sized sample */
749*0f743729SConrad Meyer   {
750*0f743729SConrad Meyer     size_t maxSampleSize = 0;
751*0f743729SConrad Meyer     i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
752*0f743729SConrad Meyer     for (; i < nbSamples; ++i) {
753*0f743729SConrad Meyer       maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
754*0f743729SConrad Meyer     }
755*0f743729SConrad Meyer     dstCapacity = ZSTD_compressBound(maxSampleSize);
756*0f743729SConrad Meyer     dst = malloc(dstCapacity);
757*0f743729SConrad Meyer   }
758*0f743729SConrad Meyer   /* Create the cctx and cdict */
759*0f743729SConrad Meyer   cctx = ZSTD_createCCtx();
760*0f743729SConrad Meyer   cdict = ZSTD_createCDict(dict, dictBufferCapacity,
761*0f743729SConrad Meyer                            parameters.zParams.compressionLevel);
762*0f743729SConrad Meyer   if (!dst || !cctx || !cdict) {
763*0f743729SConrad Meyer     goto _compressCleanup;
764*0f743729SConrad Meyer   }
765*0f743729SConrad Meyer   /* Compress each sample and sum their sizes (or error) */
766*0f743729SConrad Meyer   totalCompressedSize = dictBufferCapacity;
767*0f743729SConrad Meyer   i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
768*0f743729SConrad Meyer   for (; i < nbSamples; ++i) {
769*0f743729SConrad Meyer     const size_t size = ZSTD_compress_usingCDict(
770*0f743729SConrad Meyer         cctx, dst, dstCapacity, samples + offsets[i],
771*0f743729SConrad Meyer         samplesSizes[i], cdict);
772*0f743729SConrad Meyer     if (ZSTD_isError(size)) {
773*0f743729SConrad Meyer       totalCompressedSize = ERROR(GENERIC);
774*0f743729SConrad Meyer       goto _compressCleanup;
775*0f743729SConrad Meyer     }
776*0f743729SConrad Meyer     totalCompressedSize += size;
777*0f743729SConrad Meyer   }
778*0f743729SConrad Meyer _compressCleanup:
779*0f743729SConrad Meyer   ZSTD_freeCCtx(cctx);
780*0f743729SConrad Meyer   ZSTD_freeCDict(cdict);
781*0f743729SConrad Meyer   if (dst) {
782*0f743729SConrad Meyer     free(dst);
783*0f743729SConrad Meyer   }
784*0f743729SConrad Meyer   return totalCompressedSize;
785*0f743729SConrad Meyer }
786*0f743729SConrad Meyer 
7870c16b537SWarner Losh 
7880c16b537SWarner Losh /**
7890c16b537SWarner Losh  * Initialize the `COVER_best_t`.
7900c16b537SWarner Losh  */
791*0f743729SConrad Meyer void COVER_best_init(COVER_best_t *best) {
7920c16b537SWarner Losh   if (best==NULL) return; /* compatible with init on NULL */
7930c16b537SWarner Losh   (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
7940c16b537SWarner Losh   (void)ZSTD_pthread_cond_init(&best->cond, NULL);
7950c16b537SWarner Losh   best->liveJobs = 0;
7960c16b537SWarner Losh   best->dict = NULL;
7970c16b537SWarner Losh   best->dictSize = 0;
7980c16b537SWarner Losh   best->compressedSize = (size_t)-1;
7990c16b537SWarner Losh   memset(&best->parameters, 0, sizeof(best->parameters));
8000c16b537SWarner Losh }
8010c16b537SWarner Losh 
8020c16b537SWarner Losh /**
8030c16b537SWarner Losh  * Wait until liveJobs == 0.
8040c16b537SWarner Losh  */
805*0f743729SConrad Meyer void COVER_best_wait(COVER_best_t *best) {
8060c16b537SWarner Losh   if (!best) {
8070c16b537SWarner Losh     return;
8080c16b537SWarner Losh   }
8090c16b537SWarner Losh   ZSTD_pthread_mutex_lock(&best->mutex);
8100c16b537SWarner Losh   while (best->liveJobs != 0) {
8110c16b537SWarner Losh     ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
8120c16b537SWarner Losh   }
8130c16b537SWarner Losh   ZSTD_pthread_mutex_unlock(&best->mutex);
8140c16b537SWarner Losh }
8150c16b537SWarner Losh 
8160c16b537SWarner Losh /**
8170c16b537SWarner Losh  * Call COVER_best_wait() and then destroy the COVER_best_t.
8180c16b537SWarner Losh  */
819*0f743729SConrad Meyer void COVER_best_destroy(COVER_best_t *best) {
8200c16b537SWarner Losh   if (!best) {
8210c16b537SWarner Losh     return;
8220c16b537SWarner Losh   }
8230c16b537SWarner Losh   COVER_best_wait(best);
8240c16b537SWarner Losh   if (best->dict) {
8250c16b537SWarner Losh     free(best->dict);
8260c16b537SWarner Losh   }
8270c16b537SWarner Losh   ZSTD_pthread_mutex_destroy(&best->mutex);
8280c16b537SWarner Losh   ZSTD_pthread_cond_destroy(&best->cond);
8290c16b537SWarner Losh }
8300c16b537SWarner Losh 
8310c16b537SWarner Losh /**
8320c16b537SWarner Losh  * Called when a thread is about to be launched.
8330c16b537SWarner Losh  * Increments liveJobs.
8340c16b537SWarner Losh  */
835*0f743729SConrad Meyer void COVER_best_start(COVER_best_t *best) {
8360c16b537SWarner Losh   if (!best) {
8370c16b537SWarner Losh     return;
8380c16b537SWarner Losh   }
8390c16b537SWarner Losh   ZSTD_pthread_mutex_lock(&best->mutex);
8400c16b537SWarner Losh   ++best->liveJobs;
8410c16b537SWarner Losh   ZSTD_pthread_mutex_unlock(&best->mutex);
8420c16b537SWarner Losh }
8430c16b537SWarner Losh 
8440c16b537SWarner Losh /**
8450c16b537SWarner Losh  * Called when a thread finishes executing, both on error or success.
8460c16b537SWarner Losh  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
8470c16b537SWarner Losh  * If this dictionary is the best so far save it and its parameters.
8480c16b537SWarner Losh  */
849*0f743729SConrad Meyer void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
8500c16b537SWarner Losh                               ZDICT_cover_params_t parameters, void *dict,
8510c16b537SWarner Losh                               size_t dictSize) {
8520c16b537SWarner Losh   if (!best) {
8530c16b537SWarner Losh     return;
8540c16b537SWarner Losh   }
8550c16b537SWarner Losh   {
8560c16b537SWarner Losh     size_t liveJobs;
8570c16b537SWarner Losh     ZSTD_pthread_mutex_lock(&best->mutex);
8580c16b537SWarner Losh     --best->liveJobs;
8590c16b537SWarner Losh     liveJobs = best->liveJobs;
8600c16b537SWarner Losh     /* If the new dictionary is better */
8610c16b537SWarner Losh     if (compressedSize < best->compressedSize) {
8620c16b537SWarner Losh       /* Allocate space if necessary */
8630c16b537SWarner Losh       if (!best->dict || best->dictSize < dictSize) {
8640c16b537SWarner Losh         if (best->dict) {
8650c16b537SWarner Losh           free(best->dict);
8660c16b537SWarner Losh         }
8670c16b537SWarner Losh         best->dict = malloc(dictSize);
8680c16b537SWarner Losh         if (!best->dict) {
8690c16b537SWarner Losh           best->compressedSize = ERROR(GENERIC);
8700c16b537SWarner Losh           best->dictSize = 0;
8710c16b537SWarner Losh           return;
8720c16b537SWarner Losh         }
8730c16b537SWarner Losh       }
8740c16b537SWarner Losh       /* Save the dictionary, parameters, and size */
8750c16b537SWarner Losh       memcpy(best->dict, dict, dictSize);
8760c16b537SWarner Losh       best->dictSize = dictSize;
8770c16b537SWarner Losh       best->parameters = parameters;
8780c16b537SWarner Losh       best->compressedSize = compressedSize;
8790c16b537SWarner Losh     }
8800c16b537SWarner Losh     if (liveJobs == 0) {
8810c16b537SWarner Losh       ZSTD_pthread_cond_broadcast(&best->cond);
8820c16b537SWarner Losh     }
883*0f743729SConrad Meyer     ZSTD_pthread_mutex_unlock(&best->mutex);
8840c16b537SWarner Losh   }
8850c16b537SWarner Losh }
8860c16b537SWarner Losh 
8870c16b537SWarner Losh /**
8880c16b537SWarner Losh  * Parameters for COVER_tryParameters().
8890c16b537SWarner Losh  */
8900c16b537SWarner Losh typedef struct COVER_tryParameters_data_s {
8910c16b537SWarner Losh   const COVER_ctx_t *ctx;
8920c16b537SWarner Losh   COVER_best_t *best;
8930c16b537SWarner Losh   size_t dictBufferCapacity;
8940c16b537SWarner Losh   ZDICT_cover_params_t parameters;
8950c16b537SWarner Losh } COVER_tryParameters_data_t;
8960c16b537SWarner Losh 
8970c16b537SWarner Losh /**
898*0f743729SConrad Meyer  * Tries a set of parameters and updates the COVER_best_t with the results.
8990c16b537SWarner Losh  * This function is thread safe if zstd is compiled with multithreaded support.
9000c16b537SWarner Losh  * It takes its parameters as an *OWNING* opaque pointer to support threading.
9010c16b537SWarner Losh  */
9020c16b537SWarner Losh static void COVER_tryParameters(void *opaque) {
9030c16b537SWarner Losh   /* Save parameters as local variables */
9040c16b537SWarner Losh   COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
9050c16b537SWarner Losh   const COVER_ctx_t *const ctx = data->ctx;
9060c16b537SWarner Losh   const ZDICT_cover_params_t parameters = data->parameters;
9070c16b537SWarner Losh   size_t dictBufferCapacity = data->dictBufferCapacity;
9080c16b537SWarner Losh   size_t totalCompressedSize = ERROR(GENERIC);
9090c16b537SWarner Losh   /* Allocate space for hash table, dict, and freqs */
9100c16b537SWarner Losh   COVER_map_t activeDmers;
9110c16b537SWarner Losh   BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
9120c16b537SWarner Losh   U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
9130c16b537SWarner Losh   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
9140c16b537SWarner Losh     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
9150c16b537SWarner Losh     goto _cleanup;
9160c16b537SWarner Losh   }
9170c16b537SWarner Losh   if (!dict || !freqs) {
9180c16b537SWarner Losh     DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
9190c16b537SWarner Losh     goto _cleanup;
9200c16b537SWarner Losh   }
9210c16b537SWarner Losh   /* Copy the frequencies because we need to modify them */
9220c16b537SWarner Losh   memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
9230c16b537SWarner Losh   /* Build the dictionary */
9240c16b537SWarner Losh   {
9250c16b537SWarner Losh     const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
9260c16b537SWarner Losh                                               dictBufferCapacity, parameters);
9270c16b537SWarner Losh     dictBufferCapacity = ZDICT_finalizeDictionary(
9280c16b537SWarner Losh         dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
929*0f743729SConrad Meyer         ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
9300c16b537SWarner Losh         parameters.zParams);
9310c16b537SWarner Losh     if (ZDICT_isError(dictBufferCapacity)) {
9320c16b537SWarner Losh       DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
9330c16b537SWarner Losh       goto _cleanup;
9340c16b537SWarner Losh     }
9350c16b537SWarner Losh   }
9360c16b537SWarner Losh   /* Check total compressed size */
937*0f743729SConrad Meyer   totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
938*0f743729SConrad Meyer                                                        ctx->samples, ctx->offsets,
939*0f743729SConrad Meyer                                                        ctx->nbTrainSamples, ctx->nbSamples,
940*0f743729SConrad Meyer                                                        dict, dictBufferCapacity);
9410c16b537SWarner Losh 
9420c16b537SWarner Losh _cleanup:
9430c16b537SWarner Losh   COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
9440c16b537SWarner Losh                     dictBufferCapacity);
9450c16b537SWarner Losh   free(data);
9460c16b537SWarner Losh   COVER_map_destroy(&activeDmers);
9470c16b537SWarner Losh   if (dict) {
9480c16b537SWarner Losh     free(dict);
9490c16b537SWarner Losh   }
9500c16b537SWarner Losh   if (freqs) {
9510c16b537SWarner Losh     free(freqs);
9520c16b537SWarner Losh   }
9530c16b537SWarner Losh }
9540c16b537SWarner Losh 
9550c16b537SWarner Losh ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
9560c16b537SWarner Losh     void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
9570c16b537SWarner Losh     const size_t *samplesSizes, unsigned nbSamples,
9580c16b537SWarner Losh     ZDICT_cover_params_t *parameters) {
9590c16b537SWarner Losh   /* constants */
9600c16b537SWarner Losh   const unsigned nbThreads = parameters->nbThreads;
961*0f743729SConrad Meyer   const double splitPoint =
962*0f743729SConrad Meyer       parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
9630c16b537SWarner Losh   const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
9640c16b537SWarner Losh   const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
9650c16b537SWarner Losh   const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
9660c16b537SWarner Losh   const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
9670c16b537SWarner Losh   const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
9680c16b537SWarner Losh   const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
9690c16b537SWarner Losh   const unsigned kIterations =
9700c16b537SWarner Losh       (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
9710c16b537SWarner Losh   /* Local variables */
9720c16b537SWarner Losh   const int displayLevel = parameters->zParams.notificationLevel;
9730c16b537SWarner Losh   unsigned iteration = 1;
9740c16b537SWarner Losh   unsigned d;
9750c16b537SWarner Losh   unsigned k;
9760c16b537SWarner Losh   COVER_best_t best;
9770c16b537SWarner Losh   POOL_ctx *pool = NULL;
97819fcbaf1SConrad Meyer 
9790c16b537SWarner Losh   /* Checks */
980*0f743729SConrad Meyer   if (splitPoint <= 0 || splitPoint > 1) {
981*0f743729SConrad Meyer     LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
982*0f743729SConrad Meyer     return ERROR(GENERIC);
983*0f743729SConrad Meyer   }
9840c16b537SWarner Losh   if (kMinK < kMaxD || kMaxK < kMinK) {
9850c16b537SWarner Losh     LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
9860c16b537SWarner Losh     return ERROR(GENERIC);
9870c16b537SWarner Losh   }
9880c16b537SWarner Losh   if (nbSamples == 0) {
9890c16b537SWarner Losh     DISPLAYLEVEL(1, "Cover must have at least one input file\n");
9900c16b537SWarner Losh     return ERROR(GENERIC);
9910c16b537SWarner Losh   }
9920c16b537SWarner Losh   if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
9930c16b537SWarner Losh     DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
9940c16b537SWarner Losh                  ZDICT_DICTSIZE_MIN);
9950c16b537SWarner Losh     return ERROR(dstSize_tooSmall);
9960c16b537SWarner Losh   }
9970c16b537SWarner Losh   if (nbThreads > 1) {
9980c16b537SWarner Losh     pool = POOL_create(nbThreads, 1);
9990c16b537SWarner Losh     if (!pool) {
10000c16b537SWarner Losh       return ERROR(memory_allocation);
10010c16b537SWarner Losh     }
10020c16b537SWarner Losh   }
10030c16b537SWarner Losh   /* Initialization */
10040c16b537SWarner Losh   COVER_best_init(&best);
10050c16b537SWarner Losh   /* Turn down global display level to clean up display at level 2 and below */
10060c16b537SWarner Losh   g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
10070c16b537SWarner Losh   /* Loop through d first because each new value needs a new context */
10080c16b537SWarner Losh   LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
10090c16b537SWarner Losh                     kIterations);
10100c16b537SWarner Losh   for (d = kMinD; d <= kMaxD; d += 2) {
10110c16b537SWarner Losh     /* Initialize the context for this value of d */
10120c16b537SWarner Losh     COVER_ctx_t ctx;
10130c16b537SWarner Losh     LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1014*0f743729SConrad Meyer     if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
10150c16b537SWarner Losh       LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
10160c16b537SWarner Losh       COVER_best_destroy(&best);
10170c16b537SWarner Losh       POOL_free(pool);
10180c16b537SWarner Losh       return ERROR(GENERIC);
10190c16b537SWarner Losh     }
10200c16b537SWarner Losh     /* Loop through k reusing the same context */
10210c16b537SWarner Losh     for (k = kMinK; k <= kMaxK; k += kStepSize) {
10220c16b537SWarner Losh       /* Prepare the arguments */
10230c16b537SWarner Losh       COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
10240c16b537SWarner Losh           sizeof(COVER_tryParameters_data_t));
10250c16b537SWarner Losh       LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
10260c16b537SWarner Losh       if (!data) {
10270c16b537SWarner Losh         LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
10280c16b537SWarner Losh         COVER_best_destroy(&best);
10290c16b537SWarner Losh         COVER_ctx_destroy(&ctx);
10300c16b537SWarner Losh         POOL_free(pool);
10310c16b537SWarner Losh         return ERROR(GENERIC);
10320c16b537SWarner Losh       }
10330c16b537SWarner Losh       data->ctx = &ctx;
10340c16b537SWarner Losh       data->best = &best;
10350c16b537SWarner Losh       data->dictBufferCapacity = dictBufferCapacity;
10360c16b537SWarner Losh       data->parameters = *parameters;
10370c16b537SWarner Losh       data->parameters.k = k;
10380c16b537SWarner Losh       data->parameters.d = d;
1039*0f743729SConrad Meyer       data->parameters.splitPoint = splitPoint;
10400c16b537SWarner Losh       data->parameters.steps = kSteps;
10410c16b537SWarner Losh       data->parameters.zParams.notificationLevel = g_displayLevel;
10420c16b537SWarner Losh       /* Check the parameters */
10430c16b537SWarner Losh       if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
10440c16b537SWarner Losh         DISPLAYLEVEL(1, "Cover parameters incorrect\n");
10450c16b537SWarner Losh         free(data);
10460c16b537SWarner Losh         continue;
10470c16b537SWarner Losh       }
10480c16b537SWarner Losh       /* Call the function and pass ownership of data to it */
10490c16b537SWarner Losh       COVER_best_start(&best);
10500c16b537SWarner Losh       if (pool) {
10510c16b537SWarner Losh         POOL_add(pool, &COVER_tryParameters, data);
10520c16b537SWarner Losh       } else {
10530c16b537SWarner Losh         COVER_tryParameters(data);
10540c16b537SWarner Losh       }
10550c16b537SWarner Losh       /* Print status */
10560c16b537SWarner Losh       LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
10570c16b537SWarner Losh                          (U32)((iteration * 100) / kIterations));
10580c16b537SWarner Losh       ++iteration;
10590c16b537SWarner Losh     }
10600c16b537SWarner Losh     COVER_best_wait(&best);
10610c16b537SWarner Losh     COVER_ctx_destroy(&ctx);
10620c16b537SWarner Losh   }
10630c16b537SWarner Losh   LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
10640c16b537SWarner Losh   /* Fill the output buffer and parameters with output of the best parameters */
10650c16b537SWarner Losh   {
10660c16b537SWarner Losh     const size_t dictSize = best.dictSize;
10670c16b537SWarner Losh     if (ZSTD_isError(best.compressedSize)) {
10680c16b537SWarner Losh       const size_t compressedSize = best.compressedSize;
10690c16b537SWarner Losh       COVER_best_destroy(&best);
10700c16b537SWarner Losh       POOL_free(pool);
10710c16b537SWarner Losh       return compressedSize;
10720c16b537SWarner Losh     }
10730c16b537SWarner Losh     *parameters = best.parameters;
10740c16b537SWarner Losh     memcpy(dictBuffer, best.dict, dictSize);
10750c16b537SWarner Losh     COVER_best_destroy(&best);
10760c16b537SWarner Losh     POOL_free(pool);
10770c16b537SWarner Losh     return dictSize;
10780c16b537SWarner Losh   }
10790c16b537SWarner Losh }
1080