Home | History | Annotate | Line # | Download | only in tests
      1 #!/usr/bin/env python3
      2 """Test zstd interoperability between versions"""
      3 
      4 # ################################################################
      5 # Copyright (c) Meta Platforms, Inc. and affiliates.
      6 # All rights reserved.
      7 #
      8 # This source code is licensed under both the BSD-style license (found in the
      9 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
     10 # in the COPYING file in the root directory of this source tree).
     11 # You may select, at your option, one of the above-listed licenses.
     12 # ################################################################
     13 
     14 import filecmp
     15 import glob
     16 import hashlib
     17 import os
     18 import shutil
     19 import sys
     20 import subprocess
     21 from subprocess import Popen, PIPE
     22 
     23 repo_url = 'https://github.com/facebook/zstd.git'
     24 tmp_dir_name = 'tests/versionsTest'
     25 make_cmd = 'make'
     26 make_args = ['-j','CFLAGS=-O0']
     27 git_cmd = 'git'
     28 test_dat_src = 'README.md'
     29 test_dat = 'test_dat'
     30 head = 'vdevel'
     31 dict_source = 'dict_source'
     32 dict_globs = [
     33     'programs/*.c',
     34     'lib/common/*.c',
     35     'lib/compress/*.c',
     36     'lib/decompress/*.c',
     37     'lib/dictBuilder/*.c',
     38     'lib/legacy/*.c',
     39     'programs/*.h',
     40     'lib/common/*.h',
     41     'lib/compress/*.h',
     42     'lib/dictBuilder/*.h',
     43     'lib/legacy/*.h'
     44 ]
     45 
     46 
     47 def execute(command, print_output=False, print_error=True, param_shell=False):
     48     popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
     49     stdout_lines, stderr_lines = popen.communicate()
     50     stderr_lines = stderr_lines.decode("utf-8")
     51     stdout_lines = stdout_lines.decode("utf-8")
     52     if print_output:
     53         print(stdout_lines)
     54         print(stderr_lines)
     55     if popen.returncode is not None and popen.returncode != 0:
     56         if not print_output and print_error:
     57             print(stderr_lines)
     58     return popen.returncode
     59 
     60 
     61 def proc(cmd_args, pipe=True, dummy=False):
     62     if dummy:
     63         return
     64     if pipe:
     65         subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
     66     else:
     67         subproc = Popen(cmd_args)
     68     return subproc.communicate()
     69 
     70 
     71 def make(targets, pipe=True):
     72     cmd = [make_cmd] + make_args + targets
     73     cmd_str = str(cmd)
     74     print('compilation command : ' + cmd_str)
     75     return proc(cmd, pipe)
     76 
     77 
     78 def git(args, pipe=True):
     79     return proc([git_cmd] + args, pipe)
     80 
     81 
     82 def get_git_tags():
     83     stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]'])
     84     tags = stdout.decode('utf-8').split()
     85     return tags
     86 
     87 
     88 def dict_ok(tag, dict_name, sample):
     89     if not os.path.isfile(dict_name):
     90         return False
     91     try:
     92         cmd = ['./zstd.' + tag, '-D', dict_name]
     93         with open(sample, "rb") as i:
     94             subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     95         return True
     96     except:
     97         return False
     98 
     99 
    100 def create_dict(tag, dict_source_path, fallback_tag=None):
    101     dict_name = 'dict.' + tag
    102     if not os.path.isfile(dict_name):
    103         cFiles = glob.glob(dict_source_path + "/*.c")
    104         hFiles = glob.glob(dict_source_path + "/*.h")
    105         # Ensure the dictionary builder is deterministic
    106         files = sorted(cFiles + hFiles)
    107         if tag == 'v0.5.0':
    108             result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
    109         else:
    110             result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
    111         if result == 0 and dict_ok(tag, dict_name, files[0]):
    112             print(dict_name + ' created')
    113         elif fallback_tag is not None:
    114             fallback_dict_name = 'dict.' + fallback_tag
    115             print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
    116             shutil.copy(fallback_dict_name, dict_name)
    117         else:
    118             raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
    119     else:
    120         print(dict_name + ' already exists')
    121 
    122 
    123 def zstd(tag, args, input_file, output_file):
    124     """
    125     Zstd compress input_file to output_file.
    126     Need this helper because 0.5.0 is broken when stdout is not a TTY.
    127     Throws an exception if the command returns non-zero.
    128     """
    129     with open(input_file, "rb") as i:
    130         with open(output_file, "wb") as o:
    131             cmd = ['./zstd.' + tag] + args
    132             print("Running: '{}', input={}, output={}" .format(
    133                 ' '.join(cmd), input_file, output_file
    134             ))
    135             result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE)
    136             print("Stderr: {}".format(result.stderr.decode("ascii")))
    137             result.check_returncode()
    138 
    139 
    140 def dict_compress_sample(tag, sample):
    141     dict_name = 'dict.' + tag
    142     verbose = ['-v', '-v', '-v']
    143     zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst')
    144     zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst')
    145     zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst')
    146     zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst')
    147     zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst')
    148     zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst')
    149     # zstdFiles = glob.glob("*.zst*")
    150     # print(zstdFiles)
    151     print(tag + " : dict compression completed")
    152 
    153 
    154 def compress_sample(tag, sample):
    155     zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst')
    156     zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst')
    157     zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst')
    158     zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst')
    159     zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst')
    160     zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst')
    161     # zstdFiles = glob.glob("*.zst*")
    162     # print(zstdFiles)
    163     print(tag + " : compression completed")
    164 
    165 
    166 # https://stackoverflow.com/a/19711609/2132223
    167 def sha1_of_file(filepath):
    168     with open(filepath, 'rb') as f:
    169         return hashlib.sha1(f.read()).hexdigest()
    170 
    171 
    172 def remove_duplicates():
    173     list_of_zst = sorted(glob.glob('*.zst'))
    174     for i, ref_zst in enumerate(list_of_zst):
    175         if not os.path.isfile(ref_zst):
    176             continue
    177         for j in range(i + 1, len(list_of_zst)):
    178             compared_zst = list_of_zst[j]
    179             if not os.path.isfile(compared_zst):
    180                 continue
    181             if filecmp.cmp(ref_zst, compared_zst):
    182                 os.remove(compared_zst)
    183                 print('duplicated : {} == {}'.format(ref_zst, compared_zst))
    184 
    185 
    186 def decompress_zst(tag):
    187     dec_error = 0
    188     list_zst = sorted(glob.glob('*_nodict.zst'))
    189     for file_zst in list_zst:
    190         print(file_zst + ' ' + tag)
    191         file_dec = file_zst + '_d64_' + tag + '.dec'
    192         zstd(tag, ['-d'], file_zst, file_dec)
    193         if not filecmp.cmp(file_dec, test_dat):
    194             raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
    195         else:
    196             print('OK     ')
    197 
    198 
    199 def decompress_dict(tag):
    200     dec_error = 0
    201     list_zst = sorted(glob.glob('*_dictio.zst'))
    202     for file_zst in list_zst:
    203         dict_tag = file_zst[0:len(file_zst)-11]  # remove "_dictio.zst"
    204         if head in dict_tag: # find vdevel
    205             dict_tag = head
    206         else:
    207             dict_tag = dict_tag[dict_tag.rfind('v'):]
    208         if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
    209             continue
    210         dict_name = 'dict.' + dict_tag
    211         print(file_zst + ' ' + tag + ' dict=' + dict_tag)
    212         file_dec = file_zst + '_d64_' + tag + '.dec'
    213         zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec)
    214         if not filecmp.cmp(file_dec, test_dat):
    215             raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
    216         else:
    217             print('OK     ')
    218 
    219 
    220 if __name__ == '__main__':
    221     error_code = 0
    222     base_dir = os.getcwd() + '/..'                  # /path/to/zstd
    223     tmp_dir = base_dir + '/' + tmp_dir_name         # /path/to/zstd/tests/versionsTest
    224     clone_dir = tmp_dir + '/' + 'zstd'              # /path/to/zstd/tests/versionsTest/zstd
    225     dict_source_path = tmp_dir + '/' + dict_source  # /path/to/zstd/tests/versionsTest/dict_source
    226     programs_dir = base_dir + '/programs'           # /path/to/zstd/programs
    227     os.makedirs(tmp_dir, exist_ok=True)
    228 
    229     # since Travis clones limited depth, we should clone full repository
    230     if not os.path.isdir(clone_dir):
    231         git(['clone', repo_url, clone_dir])
    232 
    233     shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat)
    234 
    235     # Retrieve all release tags
    236     print('Retrieve all release tags :')
    237     os.chdir(clone_dir)
    238     alltags = get_git_tags() + [head]
    239     tags = [t for t in alltags if t >= 'v0.5.0']
    240     print(tags)
    241 
    242     # Build all release zstd
    243     for tag in tags:
    244         os.chdir(base_dir)
    245         dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
    246         if not os.path.isfile(dst_zstd) or tag == head:
    247             if tag != head:
    248                 print('-----------------------------------------------')
    249                 print('compiling ' + tag)
    250                 print('-----------------------------------------------')
    251                 r_dir = '{}/{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/<TAG>
    252                 os.makedirs(r_dir, exist_ok=True)
    253                 os.chdir(clone_dir)
    254                 git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
    255                 if tag == 'v0.5.0':
    256                     os.chdir(r_dir + '/dictBuilder')  # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
    257                     make(['clean'], False)   # separate 'clean' target to allow parallel build
    258                     make(['dictBuilder'], False)
    259                     shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
    260                 os.chdir(r_dir + '/programs')  # /path/to/zstd/tests/versionsTest/<TAG>/programs
    261                 make(['clean'], False)  # separate 'clean' target to allow parallel build
    262                 make(['zstd'], False)
    263             else:
    264                 os.chdir(programs_dir)
    265                 print('-----------------------------------------------')
    266                 print('compiling head')
    267                 print('-----------------------------------------------')
    268                 make(['zstd'], False)
    269             shutil.copy2('zstd', dst_zstd)
    270 
    271     # remove any remaining *.zst and *.dec from previous test
    272     os.chdir(tmp_dir)
    273     for compressed in glob.glob("*.zst"):
    274         os.remove(compressed)
    275     for dec in glob.glob("*.dec"):
    276         os.remove(dec)
    277 
    278     # copy *.c and *.h to a temporary directory ("dict_source")
    279     if not os.path.isdir(dict_source_path):
    280         os.mkdir(dict_source_path)
    281         for dict_glob in dict_globs:
    282             files = glob.glob(dict_glob, root_dir=base_dir)
    283             for file in files:
    284                 file = os.path.join(base_dir, file)
    285                 print("copying " + file + " to " + dict_source_path)
    286                 shutil.copy(file, dict_source_path)
    287 
    288     print('-----------------------------------------------')
    289     print('Compress test.dat by all released zstd')
    290     print('-----------------------------------------------')
    291 
    292     create_dict(head, dict_source_path)
    293     for tag in tags:
    294         print(tag)
    295         if tag >= 'v0.5.0':
    296             create_dict(tag, dict_source_path, head)
    297             dict_compress_sample(tag, test_dat)
    298             remove_duplicates()
    299             decompress_dict(tag)
    300         compress_sample(tag, test_dat)
    301         remove_duplicates()
    302         decompress_zst(tag)
    303 
    304     print('')
    305     print('Enumerate different compressed files')
    306     zstds = sorted(glob.glob('*.zst'))
    307     for zstd in zstds:
    308         print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))
    309