Home | History | Annotate | Line # | Download | only in pzstd
Options.cpp revision 1.1
      1 /*
      2  * Copyright (c) Meta Platforms, Inc. and affiliates.
      3  * All rights reserved.
      4  *
      5  * This source code is licensed under both the BSD-style license (found in the
      6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7  * in the COPYING file in the root directory of this source tree).
      8  */
      9 #include "Options.h"
     10 #include "util.h"
     11 #include "utils/ScopeGuard.h"
     12 
     13 #include <algorithm>
     14 #include <cassert>
     15 #include <cstdio>
     16 #include <cstring>
     17 #include <iterator>
     18 #include <thread>
     19 #include <vector>
     20 
     21 
     22 namespace pzstd {
     23 
     24 namespace {
     25 unsigned defaultNumThreads() {
     26 #ifdef PZSTD_NUM_THREADS
     27   return PZSTD_NUM_THREADS;
     28 #else
     29   return std::thread::hardware_concurrency();
     30 #endif
     31 }
     32 
     33 unsigned parseUnsigned(const char **arg) {
     34   unsigned result = 0;
     35   while (**arg >= '0' && **arg <= '9') {
     36     result *= 10;
     37     result += **arg - '0';
     38     ++(*arg);
     39   }
     40   return result;
     41 }
     42 
     43 const char *getArgument(const char *options, const char **argv, int &i,
     44                         int argc) {
     45   if (options[1] != 0) {
     46     return options + 1;
     47   }
     48   ++i;
     49   if (i == argc) {
     50     std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
     51                  *options);
     52     return nullptr;
     53   }
     54   return argv[i];
     55 }
     56 
     57 const std::string kZstdExtension = ".zst";
     58 constexpr char kStdIn[] = "-";
     59 constexpr char kStdOut[] = "-";
     60 constexpr unsigned kDefaultCompressionLevel = 3;
     61 constexpr unsigned kMaxNonUltraCompressionLevel = 19;
     62 
     63 #ifdef _WIN32
     64 const char nullOutput[] = "nul";
     65 #else
     66 const char nullOutput[] = "/dev/null";
     67 #endif
     68 
     69 void notSupported(const char *option) {
     70   std::fprintf(stderr, "Operation not supported: %s\n", option);
     71 }
     72 
     73 void usage() {
     74   std::fprintf(stderr, "Usage:\n");
     75   std::fprintf(stderr, "  pzstd [args] [FILE(s)]\n");
     76   std::fprintf(stderr, "Parallel ZSTD options:\n");
     77   std::fprintf(stderr, "  -p, --processes   #    : number of threads to use for (de)compression (default:<numcpus>)\n");
     78 
     79   std::fprintf(stderr, "ZSTD options:\n");
     80   std::fprintf(stderr, "  -#                     : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
     81   std::fprintf(stderr, "  -d, --decompress       : decompression\n");
     82   std::fprintf(stderr, "  -o                file : result stored into `file` (only if 1 input file)\n");
     83   std::fprintf(stderr, "  -f, --force            : overwrite output without prompting, (de)compress links\n");
     84   std::fprintf(stderr, "      --rm               : remove source file(s) after successful (de)compression\n");
     85   std::fprintf(stderr, "  -k, --keep             : preserve source file(s) (default)\n");
     86   std::fprintf(stderr, "  -h, --help             : display help and exit\n");
     87   std::fprintf(stderr, "  -V, --version          : display version number and exit\n");
     88   std::fprintf(stderr, "  -v, --verbose          : verbose mode; specify multiple times to increase log level (default:2)\n");
     89   std::fprintf(stderr, "  -q, --quiet            : suppress warnings; specify twice to suppress errors too\n");
     90   std::fprintf(stderr, "  -c, --stdout           : write to standard output (even if it is the console)\n");
     91 #ifdef UTIL_HAS_CREATEFILELIST
     92   std::fprintf(stderr, "  -r                     : operate recursively on directories\n");
     93 #endif
     94   std::fprintf(stderr, "      --ultra            : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
     95   std::fprintf(stderr, "  -C, --check            : integrity check (default)\n");
     96   std::fprintf(stderr, "      --no-check         : no integrity check\n");
     97   std::fprintf(stderr, "  -t, --test             : test compressed file integrity\n");
     98   std::fprintf(stderr, "  --                     : all arguments after \"--\" are treated as files\n");
     99 }
    100 } // anonymous namespace
    101 
    102 Options::Options()
    103     : numThreads(defaultNumThreads()), maxWindowLog(23),
    104       compressionLevel(kDefaultCompressionLevel), decompress(false),
    105       overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
    106       checksum(true), verbosity(2) {}
    107 
    108 Options::Status Options::parse(int argc, const char **argv) {
    109   bool test = false;
    110   bool recursive = false;
    111   bool ultra = false;
    112   bool forceStdout = false;
    113   bool followLinks = false;
    114   // Local copy of input files, which are pointers into argv.
    115   std::vector<const char *> localInputFiles;
    116   for (int i = 1; i < argc; ++i) {
    117     const char *arg = argv[i];
    118     // Protect against empty arguments
    119     if (arg[0] == 0) {
    120       continue;
    121     }
    122     // Everything after "--" is an input file
    123     if (!std::strcmp(arg, "--")) {
    124       ++i;
    125       std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
    126       break;
    127     }
    128     // Long arguments that don't have a short option
    129     {
    130       bool isLongOption = true;
    131       if (!std::strcmp(arg, "--rm")) {
    132         keepSource = false;
    133       } else if (!std::strcmp(arg, "--ultra")) {
    134         ultra = true;
    135         maxWindowLog = 0;
    136       } else if (!std::strcmp(arg, "--no-check")) {
    137         checksum = false;
    138       } else if (!std::strcmp(arg, "--sparse")) {
    139         writeMode = WriteMode::Sparse;
    140         notSupported("Sparse mode");
    141         return Status::Failure;
    142       } else if (!std::strcmp(arg, "--no-sparse")) {
    143         writeMode = WriteMode::Regular;
    144         notSupported("Sparse mode");
    145         return Status::Failure;
    146       } else if (!std::strcmp(arg, "--dictID")) {
    147         notSupported(arg);
    148         return Status::Failure;
    149       } else if (!std::strcmp(arg, "--no-dictID")) {
    150         notSupported(arg);
    151         return Status::Failure;
    152       } else {
    153         isLongOption = false;
    154       }
    155       if (isLongOption) {
    156         continue;
    157       }
    158     }
    159     // Arguments with a short option simply set their short option.
    160     const char *options = nullptr;
    161     if (!std::strcmp(arg, "--processes")) {
    162       options = "p";
    163     } else if (!std::strcmp(arg, "--version")) {
    164       options = "V";
    165     } else if (!std::strcmp(arg, "--help")) {
    166       options = "h";
    167     } else if (!std::strcmp(arg, "--decompress")) {
    168       options = "d";
    169     } else if (!std::strcmp(arg, "--force")) {
    170       options = "f";
    171     } else if (!std::strcmp(arg, "--stdout")) {
    172       options = "c";
    173     } else if (!std::strcmp(arg, "--keep")) {
    174       options = "k";
    175     } else if (!std::strcmp(arg, "--verbose")) {
    176       options = "v";
    177     } else if (!std::strcmp(arg, "--quiet")) {
    178       options = "q";
    179     } else if (!std::strcmp(arg, "--check")) {
    180       options = "C";
    181     } else if (!std::strcmp(arg, "--test")) {
    182       options = "t";
    183     } else if (arg[0] == '-' && arg[1] != 0) {
    184       options = arg + 1;
    185     } else {
    186       localInputFiles.emplace_back(arg);
    187       continue;
    188     }
    189     assert(options != nullptr);
    190 
    191     bool finished = false;
    192     while (!finished && *options != 0) {
    193       // Parse the compression level
    194       if (*options >= '0' && *options <= '9') {
    195         compressionLevel = parseUnsigned(&options);
    196         continue;
    197       }
    198 
    199       switch (*options) {
    200       case 'h':
    201       case 'H':
    202         usage();
    203         return Status::Message;
    204       case 'V':
    205         std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
    206         return Status::Message;
    207       case 'p': {
    208         finished = true;
    209         const char *optionArgument = getArgument(options, argv, i, argc);
    210         if (optionArgument == nullptr) {
    211           return Status::Failure;
    212         }
    213         if (*optionArgument < '0' || *optionArgument > '9') {
    214           std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
    215                        optionArgument);
    216           return Status::Failure;
    217         }
    218         numThreads = parseUnsigned(&optionArgument);
    219         if (*optionArgument != 0) {
    220           std::fprintf(stderr,
    221                        "Option -p expects a number, but %u%s provided\n",
    222                        numThreads, optionArgument);
    223           return Status::Failure;
    224         }
    225         break;
    226       }
    227       case 'o': {
    228         finished = true;
    229         const char *optionArgument = getArgument(options, argv, i, argc);
    230         if (optionArgument == nullptr) {
    231           return Status::Failure;
    232         }
    233         outputFile = optionArgument;
    234         break;
    235       }
    236       case 'C':
    237         checksum = true;
    238         break;
    239       case 'k':
    240         keepSource = true;
    241         break;
    242       case 'd':
    243         decompress = true;
    244         break;
    245       case 'f':
    246         overwrite = true;
    247         forceStdout = true;
    248         followLinks = true;
    249         break;
    250       case 't':
    251         test = true;
    252         decompress = true;
    253         break;
    254 #ifdef UTIL_HAS_CREATEFILELIST
    255       case 'r':
    256         recursive = true;
    257         break;
    258 #endif
    259       case 'c':
    260         outputFile = kStdOut;
    261         forceStdout = true;
    262         break;
    263       case 'v':
    264         ++verbosity;
    265         break;
    266       case 'q':
    267         --verbosity;
    268         // Ignore them for now
    269         break;
    270       // Unsupported options from Zstd
    271       case 'D':
    272       case 's':
    273         notSupported("Zstd dictionaries.");
    274         return Status::Failure;
    275       case 'b':
    276       case 'e':
    277       case 'i':
    278       case 'B':
    279         notSupported("Zstd benchmarking options.");
    280         return Status::Failure;
    281       default:
    282         std::fprintf(stderr, "Invalid argument: %s\n", arg);
    283         return Status::Failure;
    284       }
    285       if (!finished) {
    286         ++options;
    287       }
    288     } // while (*options != 0);
    289   }   // for (int i = 1; i < argc; ++i);
    290 
    291   // Set options for test mode
    292   if (test) {
    293     outputFile = nullOutput;
    294     keepSource = true;
    295   }
    296 
    297   // Input file defaults to standard input if not provided.
    298   if (localInputFiles.empty()) {
    299     localInputFiles.emplace_back(kStdIn);
    300   }
    301 
    302   // Check validity of input files
    303   if (localInputFiles.size() > 1) {
    304     const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
    305                               std::string{kStdIn});
    306     if (it != localInputFiles.end()) {
    307       std::fprintf(
    308           stderr,
    309           "Cannot specify standard input when handling multiple files\n");
    310       return Status::Failure;
    311     }
    312   }
    313   if (localInputFiles.size() > 1 || recursive) {
    314     if (!outputFile.empty() && outputFile != nullOutput) {
    315       std::fprintf(
    316           stderr,
    317           "Cannot specify an output file when handling multiple inputs\n");
    318       return Status::Failure;
    319     }
    320   }
    321 
    322   g_utilDisplayLevel = verbosity;
    323   // Remove local input files that are symbolic links
    324   if (!followLinks) {
    325       std::remove_if(localInputFiles.begin(), localInputFiles.end(),
    326                      [&](const char *path) {
    327                         bool isLink = UTIL_isLink(path);
    328                         if (isLink && verbosity >= 2) {
    329                             std::fprintf(
    330                                     stderr,
    331                                     "Warning : %s is symbolic link, ignoring\n",
    332                                     path);
    333                         }
    334                         return isLink;
    335                     });
    336   }
    337 
    338   // Translate input files/directories into files to (de)compress
    339   if (recursive) {
    340     FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
    341     if (files == nullptr) {
    342       std::fprintf(stderr, "Error traversing directories\n");
    343       return Status::Failure;
    344     }
    345     auto guard =
    346         makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
    347     if (files->tableSize == 0) {
    348       std::fprintf(stderr, "No files found\n");
    349       return Status::Failure;
    350     }
    351     inputFiles.resize(files->tableSize);
    352     std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
    353   } else {
    354     inputFiles.resize(localInputFiles.size());
    355     std::copy(localInputFiles.begin(), localInputFiles.end(),
    356               inputFiles.begin());
    357   }
    358   localInputFiles.clear();
    359   assert(!inputFiles.empty());
    360 
    361   // If reading from standard input, default to standard output
    362   if (inputFiles[0] == kStdIn && outputFile.empty()) {
    363     assert(inputFiles.size() == 1);
    364     outputFile = "-";
    365   }
    366 
    367   if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
    368     assert(inputFiles.size() == 1);
    369     std::fprintf(stderr, "Cannot read input from interactive console\n");
    370     return Status::Failure;
    371   }
    372   if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
    373     std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
    374                          "specified and decompressing\n");
    375     return Status::Failure;
    376   }
    377 
    378   // Check compression level
    379   {
    380     unsigned maxCLevel =
    381         ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
    382     if (compressionLevel > maxCLevel || compressionLevel == 0) {
    383       std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
    384       return Status::Failure;
    385     }
    386   }
    387 
    388   // Check that numThreads is set
    389   if (numThreads == 0) {
    390     std::fprintf(stderr, "Invalid arguments: # of threads not specified "
    391                          "and unable to determine hardware concurrency.\n");
    392     return Status::Failure;
    393   }
    394 
    395   // Modify verbosity
    396   // If we are piping input and output, turn off interaction
    397   if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
    398     verbosity = 1;
    399   }
    400   // If we are in multi-file mode, turn off interaction
    401   if (inputFiles.size() > 1 && verbosity == 2) {
    402     verbosity = 1;
    403   }
    404 
    405   return Status::Success;
    406 }
    407 
    408 std::string Options::getOutputFile(const std::string &inputFile) const {
    409   if (!outputFile.empty()) {
    410     return outputFile;
    411   }
    412   // Attempt to add/remove zstd extension from the input file
    413   if (decompress) {
    414     int stemSize = inputFile.size() - kZstdExtension.size();
    415     if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
    416       return inputFile.substr(0, stemSize);
    417     } else {
    418       return "";
    419     }
    420   } else {
    421     return inputFile + kZstdExtension;
    422   }
    423 }
    424 }
    425