Home | History | Annotate | Line # | Download | only in regression
      1  1.1  christos /*
      2  1.1  christos  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  1.1  christos  * All rights reserved.
      4  1.1  christos  *
      5  1.1  christos  * This source code is licensed under both the BSD-style license (found in the
      6  1.1  christos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  1.1  christos  * in the COPYING file in the root directory of this source tree).
      8  1.1  christos  * You may select, at your option, one of the above-listed licenses.
      9  1.1  christos  */
     10  1.1  christos 
     11  1.1  christos #include "data.h"
     12  1.1  christos 
     13  1.1  christos #include <assert.h>
     14  1.1  christos #include <errno.h>
     15  1.1  christos #include <stdio.h>
     16  1.1  christos #include <string.h>
     17  1.1  christos #include <stdlib.h>   /* free() */
     18  1.1  christos 
     19  1.1  christos #include <sys/stat.h>
     20  1.1  christos 
     21  1.1  christos #include <curl/curl.h>
     22  1.1  christos 
     23  1.1  christos #include "mem.h"
     24  1.1  christos #include "util.h"
     25  1.1  christos #define XXH_STATIC_LINKING_ONLY
     26  1.1  christos #include "xxhash.h"
     27  1.1  christos 
     28  1.1  christos /**
     29  1.1  christos  * Data objects
     30  1.1  christos  */
     31  1.1  christos 
     32  1.1  christos #define REGRESSION_RELEASE(x) \
     33  1.1  christos     "https://github.com/facebook/zstd/releases/download/regression-data/" x
     34  1.1  christos 
     35  1.1  christos data_t silesia = {
     36  1.1  christos     .name = "silesia",
     37  1.1  christos     .type = data_type_dir,
     38  1.1  christos     .data =
     39  1.1  christos         {
     40  1.1  christos             .url = REGRESSION_RELEASE("silesia.tar.zst"),
     41  1.1  christos             .xxhash64 = 0x48a199f92f93e977LL,
     42  1.1  christos         },
     43  1.1  christos };
     44  1.1  christos 
     45  1.1  christos data_t silesia_tar = {
     46  1.1  christos     .name = "silesia.tar",
     47  1.1  christos     .type = data_type_file,
     48  1.1  christos     .data =
     49  1.1  christos         {
     50  1.1  christos             .url = REGRESSION_RELEASE("silesia.tar.zst"),
     51  1.1  christos             .xxhash64 = 0x48a199f92f93e977LL,
     52  1.1  christos         },
     53  1.1  christos };
     54  1.1  christos 
     55  1.1  christos data_t github = {
     56  1.1  christos     .name = "github",
     57  1.1  christos     .type = data_type_dir,
     58  1.1  christos     .data =
     59  1.1  christos         {
     60  1.1  christos             .url = REGRESSION_RELEASE("github.tar.zst"),
     61  1.1  christos             .xxhash64 = 0xa9b1b44b020df292LL,
     62  1.1  christos         },
     63  1.1  christos     .dict =
     64  1.1  christos         {
     65  1.1  christos             .url = REGRESSION_RELEASE("github.dict.zst"),
     66  1.1  christos             .xxhash64 = 0x1eddc6f737d3cb53LL,
     67  1.1  christos 
     68  1.1  christos         },
     69  1.1  christos };
     70  1.1  christos 
     71  1.1  christos data_t github_tar = {
     72  1.1  christos     .name = "github.tar",
     73  1.1  christos     .type = data_type_file,
     74  1.1  christos     .data =
     75  1.1  christos         {
     76  1.1  christos             .url = REGRESSION_RELEASE("github.tar.zst"),
     77  1.1  christos             .xxhash64 = 0xa9b1b44b020df292LL,
     78  1.1  christos         },
     79  1.1  christos     .dict =
     80  1.1  christos         {
     81  1.1  christos             .url = REGRESSION_RELEASE("github.dict.zst"),
     82  1.1  christos             .xxhash64 = 0x1eddc6f737d3cb53LL,
     83  1.1  christos 
     84  1.1  christos         },
     85  1.1  christos };
     86  1.1  christos 
     87  1.1  christos static data_t* g_data[] = {
     88  1.1  christos     &silesia,
     89  1.1  christos     &silesia_tar,
     90  1.1  christos     &github,
     91  1.1  christos     &github_tar,
     92  1.1  christos     NULL,
     93  1.1  christos };
     94  1.1  christos 
     95  1.1  christos data_t const* const* data = (data_t const* const*)g_data;
     96  1.1  christos 
     97  1.1  christos /**
     98  1.1  christos  * data helpers.
     99  1.1  christos  */
    100  1.1  christos 
    101  1.1  christos int data_has_dict(data_t const* data) {
    102  1.1  christos     return data->dict.url != NULL;
    103  1.1  christos }
    104  1.1  christos 
    105  1.1  christos /**
    106  1.1  christos  * data buffer helper functions (documented in header).
    107  1.1  christos  */
    108  1.1  christos 
    109  1.1  christos data_buffer_t data_buffer_create(size_t const capacity) {
    110  1.1  christos     data_buffer_t buffer = {};
    111  1.1  christos 
    112  1.1  christos     buffer.data = (uint8_t*)malloc(capacity);
    113  1.1  christos     if (buffer.data == NULL)
    114  1.1  christos         return buffer;
    115  1.1  christos     buffer.capacity = capacity;
    116  1.1  christos     return buffer;
    117  1.1  christos }
    118  1.1  christos 
    119  1.1  christos data_buffer_t data_buffer_read(char const* filename) {
    120  1.1  christos     data_buffer_t buffer = {};
    121  1.1  christos 
    122  1.1  christos     uint64_t const size = UTIL_getFileSize(filename);
    123  1.1  christos     if (size == UTIL_FILESIZE_UNKNOWN) {
    124  1.1  christos         fprintf(stderr, "unknown size for %s\n", filename);
    125  1.1  christos         return buffer;
    126  1.1  christos     }
    127  1.1  christos 
    128  1.1  christos     buffer.data = (uint8_t*)malloc(size);
    129  1.1  christos     if (buffer.data == NULL) {
    130  1.1  christos         fprintf(stderr, "malloc failed\n");
    131  1.1  christos         return buffer;
    132  1.1  christos     }
    133  1.1  christos     buffer.capacity = size;
    134  1.1  christos 
    135  1.1  christos     FILE* file = fopen(filename, "rb");
    136  1.1  christos     if (file == NULL) {
    137  1.1  christos         fprintf(stderr, "file null\n");
    138  1.1  christos         goto err;
    139  1.1  christos     }
    140  1.1  christos     buffer.size = fread(buffer.data, 1, buffer.capacity, file);
    141  1.1  christos     fclose(file);
    142  1.1  christos     if (buffer.size != buffer.capacity) {
    143  1.1  christos         fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
    144  1.1  christos         goto err;
    145  1.1  christos     }
    146  1.1  christos 
    147  1.1  christos     return buffer;
    148  1.1  christos err:
    149  1.1  christos     free(buffer.data);
    150  1.1  christos     memset(&buffer, 0, sizeof(buffer));
    151  1.1  christos     return buffer;
    152  1.1  christos }
    153  1.1  christos 
    154  1.1  christos data_buffer_t data_buffer_get_data(data_t const* data) {
    155  1.1  christos     data_buffer_t const kEmptyBuffer = {};
    156  1.1  christos 
    157  1.1  christos     if (data->type != data_type_file)
    158  1.1  christos         return kEmptyBuffer;
    159  1.1  christos 
    160  1.1  christos     return data_buffer_read(data->data.path);
    161  1.1  christos }
    162  1.1  christos 
    163  1.1  christos data_buffer_t data_buffer_get_dict(data_t const* data) {
    164  1.1  christos     data_buffer_t const kEmptyBuffer = {};
    165  1.1  christos 
    166  1.1  christos     if (!data_has_dict(data))
    167  1.1  christos         return kEmptyBuffer;
    168  1.1  christos 
    169  1.1  christos     return data_buffer_read(data->dict.path);
    170  1.1  christos }
    171  1.1  christos 
    172  1.1  christos int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
    173  1.1  christos     size_t const size =
    174  1.1  christos         buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
    175  1.1  christos     int const cmp = memcmp(buffer1.data, buffer2.data, size);
    176  1.1  christos     if (cmp != 0)
    177  1.1  christos         return cmp;
    178  1.1  christos     if (buffer1.size < buffer2.size)
    179  1.1  christos         return -1;
    180  1.1  christos     if (buffer1.size == buffer2.size)
    181  1.1  christos         return 0;
    182  1.1  christos     assert(buffer1.size > buffer2.size);
    183  1.1  christos     return 1;
    184  1.1  christos }
    185  1.1  christos 
    186  1.1  christos void data_buffer_free(data_buffer_t buffer) {
    187  1.1  christos     free(buffer.data);
    188  1.1  christos }
    189  1.1  christos 
    190  1.1  christos /**
    191  1.1  christos  * data filenames helpers.
    192  1.1  christos  */
    193  1.1  christos 
    194  1.1  christos FileNamesTable* data_filenames_get(data_t const* data)
    195  1.1  christos {
    196  1.1  christos     char const* const path = data->data.path;
    197  1.1  christos     return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
    198  1.1  christos }
    199  1.1  christos 
    200  1.1  christos /**
    201  1.1  christos  * data buffers helpers.
    202  1.1  christos  */
    203  1.1  christos 
    204  1.1  christos data_buffers_t data_buffers_get(data_t const* data) {
    205  1.1  christos     data_buffers_t buffers = {.size = 0};
    206  1.1  christos     FileNamesTable* const filenames = data_filenames_get(data);
    207  1.1  christos     if (filenames == NULL) return buffers;
    208  1.1  christos     if (filenames->tableSize == 0) {
    209  1.1  christos         UTIL_freeFileNamesTable(filenames);
    210  1.1  christos         return buffers;
    211  1.1  christos     }
    212  1.1  christos 
    213  1.1  christos     data_buffer_t* buffersPtr =
    214  1.1  christos         (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr));
    215  1.1  christos     if (buffersPtr == NULL) {
    216  1.1  christos         UTIL_freeFileNamesTable(filenames);
    217  1.1  christos         return buffers;
    218  1.1  christos     }
    219  1.1  christos     buffers.buffers = (data_buffer_t const*)buffersPtr;
    220  1.1  christos     buffers.size = filenames->tableSize;
    221  1.1  christos 
    222  1.1  christos     for (size_t i = 0; i < filenames->tableSize; ++i) {
    223  1.1  christos         buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
    224  1.1  christos         if (buffersPtr[i].data == NULL) {
    225  1.1  christos             data_buffers_t const kEmptyBuffer = {};
    226  1.1  christos             data_buffers_free(buffers);
    227  1.1  christos             UTIL_freeFileNamesTable(filenames);
    228  1.1  christos             return kEmptyBuffer;
    229  1.1  christos         }
    230  1.1  christos     }
    231  1.1  christos 
    232  1.1  christos     UTIL_freeFileNamesTable(filenames);
    233  1.1  christos     return buffers;
    234  1.1  christos }
    235  1.1  christos 
    236  1.1  christos /**
    237  1.1  christos  * Frees the data buffers.
    238  1.1  christos  */
    239  1.1  christos void data_buffers_free(data_buffers_t buffers) {
    240  1.1  christos     free((data_buffer_t*)buffers.buffers);
    241  1.1  christos }
    242  1.1  christos 
    243  1.1  christos /**
    244  1.1  christos  * Initialization and download functions.
    245  1.1  christos  */
    246  1.1  christos 
    247  1.1  christos static char* g_data_dir = NULL;
    248  1.1  christos 
    249  1.1  christos /* mkdir -p */
    250  1.1  christos static int ensure_directory_exists(char const* indir) {
    251  1.1  christos     char* const dir = strdup(indir);
    252  1.1  christos     char* end = dir;
    253  1.1  christos     int ret = 0;
    254  1.1  christos     if (dir == NULL) {
    255  1.1  christos         ret = EINVAL;
    256  1.1  christos         goto out;
    257  1.1  christos     }
    258  1.1  christos     do {
    259  1.1  christos         /* Find the next directory level. */
    260  1.1  christos         for (++end; *end != '\0' && *end != '/'; ++end)
    261  1.1  christos             ;
    262  1.1  christos         /* End the string there, make the directory, and restore the string. */
    263  1.1  christos         char const save = *end;
    264  1.1  christos         *end = '\0';
    265  1.1  christos         int const isdir = UTIL_isDirectory(dir);
    266  1.1  christos         ret = mkdir(dir, S_IRWXU);
    267  1.1  christos         *end = save;
    268  1.1  christos         /* Its okay if the directory already exists. */
    269  1.1  christos         if (ret == 0 || (errno == EEXIST && isdir))
    270  1.1  christos             continue;
    271  1.1  christos         ret = errno;
    272  1.1  christos         fprintf(stderr, "mkdir() failed\n");
    273  1.1  christos         goto out;
    274  1.1  christos     } while (*end != '\0');
    275  1.1  christos 
    276  1.1  christos     ret = 0;
    277  1.1  christos out:
    278  1.1  christos     free(dir);
    279  1.1  christos     return ret;
    280  1.1  christos }
    281  1.1  christos 
    282  1.1  christos /** Concatenate 3 strings into a new buffer. */
    283  1.1  christos static char* cat3(char const* str1, char const* str2, char const* str3) {
    284  1.1  christos     size_t const size1 = strlen(str1);
    285  1.1  christos     size_t const size2 = strlen(str2);
    286  1.1  christos     size_t const size3 = str3 == NULL ? 0 : strlen(str3);
    287  1.1  christos     size_t const size = size1 + size2 + size3 + 1;
    288  1.1  christos     char* const dst = (char*)malloc(size);
    289  1.1  christos     if (dst == NULL)
    290  1.1  christos         return NULL;
    291  1.1  christos     strcpy(dst, str1);
    292  1.1  christos     strcpy(dst + size1, str2);
    293  1.1  christos     if (str3 != NULL)
    294  1.1  christos         strcpy(dst + size1 + size2, str3);
    295  1.1  christos     assert(strlen(dst) == size1 + size2 + size3);
    296  1.1  christos     return dst;
    297  1.1  christos }
    298  1.1  christos 
    299  1.1  christos static char* cat2(char const* str1, char const* str2) {
    300  1.1  christos     return cat3(str1, str2, NULL);
    301  1.1  christos }
    302  1.1  christos 
    303  1.1  christos /**
    304  1.1  christos  * State needed by the curl callback.
    305  1.1  christos  * It takes data from curl, hashes it, and writes it to the file.
    306  1.1  christos  */
    307  1.1  christos typedef struct {
    308  1.1  christos     FILE* file;
    309  1.1  christos     XXH64_state_t xxhash64;
    310  1.1  christos     int error;
    311  1.1  christos } curl_data_t;
    312  1.1  christos 
    313  1.1  christos /** Create the curl state. */
    314  1.1  christos static curl_data_t curl_data_create(
    315  1.1  christos     data_resource_t const* resource,
    316  1.1  christos     data_type_t type) {
    317  1.1  christos     curl_data_t cdata = {};
    318  1.1  christos 
    319  1.1  christos     XXH64_reset(&cdata.xxhash64, 0);
    320  1.1  christos 
    321  1.1  christos     assert(UTIL_isDirectory(g_data_dir));
    322  1.1  christos 
    323  1.1  christos     if (type == data_type_file) {
    324  1.1  christos         /* Decompress the resource and store to the path. */
    325  1.1  christos         char* cmd = cat3("zstd -dqfo '", resource->path, "'");
    326  1.1  christos         if (cmd == NULL) {
    327  1.1  christos             cdata.error = ENOMEM;
    328  1.1  christos             return cdata;
    329  1.1  christos         }
    330  1.1  christos         cdata.file = popen(cmd, "w");
    331  1.1  christos         free(cmd);
    332  1.1  christos     } else {
    333  1.1  christos         /* Decompress and extract the resource to the cache directory. */
    334  1.1  christos         char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'");
    335  1.1  christos         if (cmd == NULL) {
    336  1.1  christos             cdata.error = ENOMEM;
    337  1.1  christos             return cdata;
    338  1.1  christos         }
    339  1.1  christos         cdata.file = popen(cmd, "w");
    340  1.1  christos         free(cmd);
    341  1.1  christos     }
    342  1.1  christos     if (cdata.file == NULL) {
    343  1.1  christos         cdata.error = errno;
    344  1.1  christos     }
    345  1.1  christos 
    346  1.1  christos     return cdata;
    347  1.1  christos }
    348  1.1  christos 
    349  1.1  christos /** Free the curl state. */
    350  1.1  christos static int curl_data_free(curl_data_t cdata) {
    351  1.1  christos     return pclose(cdata.file);
    352  1.1  christos }
    353  1.1  christos 
    354  1.1  christos /** curl callback. Updates the hash, and writes to the file. */
    355  1.1  christos static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
    356  1.1  christos     curl_data_t* cdata = (curl_data_t*)ptr;
    357  1.1  christos     size_t const written = fwrite(data, size, count, cdata->file);
    358  1.1  christos     XXH64_update(&cdata->xxhash64, data, written * size);
    359  1.1  christos     return written;
    360  1.1  christos }
    361  1.1  christos 
    362  1.1  christos static int curl_download_resource(
    363  1.1  christos     CURL* curl,
    364  1.1  christos     data_resource_t const* resource,
    365  1.1  christos     data_type_t type) {
    366  1.1  christos     curl_data_t cdata;
    367  1.1  christos     /* Download the data. */
    368  1.1  christos     if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
    369  1.1  christos         return EINVAL;
    370  1.1  christos     if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
    371  1.1  christos         return EINVAL;
    372  1.1  christos     cdata = curl_data_create(resource, type);
    373  1.1  christos     if (cdata.error != 0)
    374  1.1  christos         return cdata.error;
    375  1.1  christos     int const curl_err = curl_easy_perform(curl);
    376  1.1  christos     int const close_err = curl_data_free(cdata);
    377  1.1  christos     if (curl_err) {
    378  1.1  christos         fprintf(
    379  1.1  christos             stderr,
    380  1.1  christos             "downloading '%s' for '%s' failed\n",
    381  1.1  christos             resource->url,
    382  1.1  christos             resource->path);
    383  1.1  christos         return EIO;
    384  1.1  christos     }
    385  1.1  christos     if (close_err) {
    386  1.1  christos         fprintf(stderr, "writing data to '%s' failed\n", resource->path);
    387  1.1  christos         return EIO;
    388  1.1  christos     }
    389  1.1  christos     /* check that the file exists. */
    390  1.1  christos     if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
    391  1.1  christos         fprintf(stderr, "output file '%s' does not exist\n", resource->path);
    392  1.1  christos         return EIO;
    393  1.1  christos     }
    394  1.1  christos     if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
    395  1.1  christos         fprintf(
    396  1.1  christos             stderr, "output directory '%s' does not exist\n", resource->path);
    397  1.1  christos         return EIO;
    398  1.1  christos     }
    399  1.1  christos     /* Check that the hash matches. */
    400  1.1  christos     if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
    401  1.1  christos         fprintf(
    402  1.1  christos             stderr,
    403  1.1  christos             "checksum does not match: 0x%llxLL != 0x%llxLL\n",
    404  1.1  christos             (unsigned long long)XXH64_digest(&cdata.xxhash64),
    405  1.1  christos             (unsigned long long)resource->xxhash64);
    406  1.1  christos         return EINVAL;
    407  1.1  christos     }
    408  1.1  christos 
    409  1.1  christos     return 0;
    410  1.1  christos }
    411  1.1  christos 
    412  1.1  christos /** Download a single data object. */
    413  1.1  christos static int curl_download_datum(CURL* curl, data_t const* data) {
    414  1.1  christos     int ret;
    415  1.1  christos     ret = curl_download_resource(curl, &data->data, data->type);
    416  1.1  christos     if (ret != 0)
    417  1.1  christos         return ret;
    418  1.1  christos     if (data_has_dict(data)) {
    419  1.1  christos         ret = curl_download_resource(curl, &data->dict, data_type_file);
    420  1.1  christos         if (ret != 0)
    421  1.1  christos             return ret;
    422  1.1  christos     }
    423  1.1  christos     return ret;
    424  1.1  christos }
    425  1.1  christos 
    426  1.1  christos /** Download all the data. */
    427  1.1  christos static int curl_download_data(data_t const* const* data) {
    428  1.1  christos     if (curl_global_init(CURL_GLOBAL_ALL) != 0)
    429  1.1  christos         return EFAULT;
    430  1.1  christos 
    431  1.1  christos     curl_data_t cdata = {};
    432  1.1  christos     CURL* curl = curl_easy_init();
    433  1.1  christos     int err = EFAULT;
    434  1.1  christos 
    435  1.1  christos     if (curl == NULL)
    436  1.1  christos         return EFAULT;
    437  1.1  christos 
    438  1.1  christos     if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
    439  1.1  christos         goto out;
    440  1.1  christos     if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
    441  1.1  christos         goto out;
    442  1.1  christos     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
    443  1.1  christos         goto out;
    444  1.1  christos 
    445  1.1  christos     assert(data != NULL);
    446  1.1  christos     for (; *data != NULL; ++data) {
    447  1.1  christos         if (curl_download_datum(curl, *data) != 0)
    448  1.1  christos             goto out;
    449  1.1  christos     }
    450  1.1  christos 
    451  1.1  christos     err = 0;
    452  1.1  christos out:
    453  1.1  christos     curl_easy_cleanup(curl);
    454  1.1  christos     curl_global_cleanup();
    455  1.1  christos     return err;
    456  1.1  christos }
    457  1.1  christos 
    458  1.1  christos /** Fill the path member variable of the data objects. */
    459  1.1  christos static int data_create_paths(data_t* const* data, char const* dir) {
    460  1.1  christos     size_t const dirlen = strlen(dir);
    461  1.1  christos     assert(data != NULL);
    462  1.1  christos     for (; *data != NULL; ++data) {
    463  1.1  christos         data_t* const datum = *data;
    464  1.1  christos         datum->data.path = cat3(dir, "/", datum->name);
    465  1.1  christos         if (datum->data.path == NULL)
    466  1.1  christos             return ENOMEM;
    467  1.1  christos         if (data_has_dict(datum)) {
    468  1.1  christos             datum->dict.path = cat2(datum->data.path, ".dict");
    469  1.1  christos             if (datum->dict.path == NULL)
    470  1.1  christos                 return ENOMEM;
    471  1.1  christos         }
    472  1.1  christos     }
    473  1.1  christos     return 0;
    474  1.1  christos }
    475  1.1  christos 
    476  1.1  christos /** Free the path member variable of the data objects. */
    477  1.1  christos static void data_free_paths(data_t* const* data) {
    478  1.1  christos     assert(data != NULL);
    479  1.1  christos     for (; *data != NULL; ++data) {
    480  1.1  christos         data_t* datum = *data;
    481  1.1  christos         free((void*)datum->data.path);
    482  1.1  christos         free((void*)datum->dict.path);
    483  1.1  christos         datum->data.path = NULL;
    484  1.1  christos         datum->dict.path = NULL;
    485  1.1  christos     }
    486  1.1  christos }
    487  1.1  christos 
    488  1.1  christos static char const kStampName[] = "STAMP";
    489  1.1  christos 
    490  1.1  christos static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
    491  1.1  christos     if (!MEM_isLittleEndian())
    492  1.1  christos         data = MEM_swap64(data);
    493  1.1  christos     XXH64_update(state, &data, sizeof(data));
    494  1.1  christos }
    495  1.1  christos 
    496  1.1  christos /** Hash the data to create the stamp. */
    497  1.1  christos static uint64_t stamp_hash(data_t const* const* data) {
    498  1.1  christos     XXH64_state_t state;
    499  1.1  christos 
    500  1.1  christos     XXH64_reset(&state, 0);
    501  1.1  christos     assert(data != NULL);
    502  1.1  christos     for (; *data != NULL; ++data) {
    503  1.1  christos         data_t const* datum = *data;
    504  1.1  christos         /* We don't care about the URL that we fetch from. */
    505  1.1  christos         /* The path is derived from the name. */
    506  1.1  christos         XXH64_update(&state, datum->name, strlen(datum->name));
    507  1.1  christos         xxh_update_le(&state, datum->data.xxhash64);
    508  1.1  christos         xxh_update_le(&state, datum->dict.xxhash64);
    509  1.1  christos         xxh_update_le(&state, datum->type);
    510  1.1  christos     }
    511  1.1  christos     return XXH64_digest(&state);
    512  1.1  christos }
    513  1.1  christos 
    514  1.1  christos /** Check if the stamp matches the stamp in the cache directory. */
    515  1.1  christos static int stamp_check(char const* dir, data_t const* const* data) {
    516  1.1  christos     char* stamp = cat3(dir, "/", kStampName);
    517  1.1  christos     uint64_t const expected = stamp_hash(data);
    518  1.1  christos     XXH64_canonical_t actual;
    519  1.1  christos     FILE* stampfile = NULL;
    520  1.1  christos     int matches = 0;
    521  1.1  christos 
    522  1.1  christos     if (stamp == NULL)
    523  1.1  christos         goto out;
    524  1.1  christos     if (!UTIL_isRegularFile(stamp)) {
    525  1.1  christos         fprintf(stderr, "stamp does not exist: recreating the data cache\n");
    526  1.1  christos         goto out;
    527  1.1  christos     }
    528  1.1  christos 
    529  1.1  christos     stampfile = fopen(stamp, "rb");
    530  1.1  christos     if (stampfile == NULL) {
    531  1.1  christos         fprintf(stderr, "could not open stamp: recreating the data cache\n");
    532  1.1  christos         goto out;
    533  1.1  christos     }
    534  1.1  christos 
    535  1.1  christos     size_t b;
    536  1.1  christos     if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
    537  1.1  christos         fprintf(stderr, "invalid stamp: recreating the data cache\n");
    538  1.1  christos         goto out;
    539  1.1  christos     }
    540  1.1  christos 
    541  1.1  christos     matches = (expected == XXH64_hashFromCanonical(&actual));
    542  1.1  christos     if (matches)
    543  1.1  christos         fprintf(stderr, "stamp matches: reusing the cached data\n");
    544  1.1  christos     else
    545  1.1  christos         fprintf(stderr, "stamp does not match: recreating the data cache\n");
    546  1.1  christos 
    547  1.1  christos out:
    548  1.1  christos     free(stamp);
    549  1.1  christos     if (stampfile != NULL)
    550  1.1  christos         fclose(stampfile);
    551  1.1  christos     return matches;
    552  1.1  christos }
    553  1.1  christos 
    554  1.1  christos /** On success write a new stamp, on failure delete the old stamp. */
    555  1.1  christos static int
    556  1.1  christos stamp_write(char const* dir, data_t const* const* data, int const data_err) {
    557  1.1  christos     char* stamp = cat3(dir, "/", kStampName);
    558  1.1  christos     FILE* stampfile = NULL;
    559  1.1  christos     int err = EIO;
    560  1.1  christos 
    561  1.1  christos     if (stamp == NULL)
    562  1.1  christos         return ENOMEM;
    563  1.1  christos 
    564  1.1  christos     if (data_err != 0) {
    565  1.1  christos         err = data_err;
    566  1.1  christos         goto out;
    567  1.1  christos     }
    568  1.1  christos     XXH64_canonical_t hash;
    569  1.1  christos 
    570  1.1  christos     XXH64_canonicalFromHash(&hash, stamp_hash(data));
    571  1.1  christos 
    572  1.1  christos     stampfile = fopen(stamp, "wb");
    573  1.1  christos     if (stampfile == NULL)
    574  1.1  christos         goto out;
    575  1.1  christos     if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
    576  1.1  christos         goto out;
    577  1.1  christos     err = 0;
    578  1.1  christos     fprintf(stderr, "stamped new data cache\n");
    579  1.1  christos out:
    580  1.1  christos     if (err != 0)
    581  1.1  christos         /* Ignore errors. */
    582  1.1  christos         unlink(stamp);
    583  1.1  christos     free(stamp);
    584  1.1  christos     if (stampfile != NULL)
    585  1.1  christos         fclose(stampfile);
    586  1.1  christos     return err;
    587  1.1  christos }
    588  1.1  christos 
    589  1.1  christos int data_init(char const* dir) {
    590  1.1  christos     int err;
    591  1.1  christos 
    592  1.1  christos     if (dir == NULL)
    593  1.1  christos         return EINVAL;
    594  1.1  christos 
    595  1.1  christos     /* This must be first to simplify logic. */
    596  1.1  christos     err = ensure_directory_exists(dir);
    597  1.1  christos     if (err != 0)
    598  1.1  christos         return err;
    599  1.1  christos 
    600  1.1  christos     /* Save the cache directory. */
    601  1.1  christos     g_data_dir = strdup(dir);
    602  1.1  christos     if (g_data_dir == NULL)
    603  1.1  christos         return ENOMEM;
    604  1.1  christos 
    605  1.1  christos     err = data_create_paths(g_data, dir);
    606  1.1  christos     if (err != 0)
    607  1.1  christos         return err;
    608  1.1  christos 
    609  1.1  christos     /* If the stamp matches then we are good to go.
    610  1.1  christos      * This must be called before any modifications to the data cache.
    611  1.1  christos      * After this point, we MUST call stamp_write() to update the STAMP,
    612  1.1  christos      * since we've updated the data cache.
    613  1.1  christos      */
    614  1.1  christos     if (stamp_check(dir, data))
    615  1.1  christos         return 0;
    616  1.1  christos 
    617  1.1  christos     err = curl_download_data(data);
    618  1.1  christos     if (err != 0)
    619  1.1  christos         goto out;
    620  1.1  christos 
    621  1.1  christos out:
    622  1.1  christos     /* This must be last, since it must know if data_init() succeeded. */
    623  1.1  christos     stamp_write(dir, data, err);
    624  1.1  christos     return err;
    625  1.1  christos }
    626  1.1  christos 
    627  1.1  christos void data_finish(void) {
    628  1.1  christos     data_free_paths(g_data);
    629  1.1  christos     free(g_data_dir);
    630  1.1  christos     g_data_dir = NULL;
    631  1.1  christos }
    632