Home | History | Annotate | Line # | Download | only in llvm_checksum
      1 #!/usr/bin/env python
      2 """ A small program to compute checksums of LLVM checkout.
      3 """
      4 from __future__ import absolute_import
      5 from __future__ import division
      6 from __future__ import print_function
      7 
      8 import hashlib
      9 import logging
     10 import re
     11 import sys
     12 from argparse import ArgumentParser
     13 from project_tree import *
     14 
     15 SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
     16 
     17 
     18 def main():
     19   parser = ArgumentParser()
     20   parser.add_argument(
     21       "-v", "--verbose", action="store_true", help="enable debug logging")
     22   parser.add_argument(
     23       "-c",
     24       "--check",
     25       metavar="reference_file",
     26       help="read checksums from reference_file and " +
     27       "check they match checksums of llvm_path.")
     28   parser.add_argument(
     29       "--partial",
     30       action="store_true",
     31       help="ignore projects from reference_file " +
     32       "that are not checked out in llvm_path.")
     33   parser.add_argument(
     34       "--multi_dir",
     35       action="store_true",
     36       help="indicates llvm_path contains llvm, checked out " +
     37       "into multiple directories, as opposed to a " +
     38       "typical single source tree checkout.")
     39   parser.add_argument("llvm_path")
     40 
     41   args = parser.parse_args()
     42   if args.check is not None:
     43     with open(args.check, "r") as f:
     44       reference_checksums = ReadLLVMChecksums(f)
     45   else:
     46     reference_checksums = None
     47 
     48   if args.verbose:
     49     logging.basicConfig(level=logging.DEBUG)
     50 
     51   llvm_projects = CreateLLVMProjects(not args.multi_dir)
     52   checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
     53 
     54   if reference_checksums is None:
     55     WriteLLVMChecksums(checksums, sys.stdout)
     56     sys.exit(0)
     57 
     58   if not ValidateChecksums(reference_checksums, checksums, args.partial):
     59     sys.stdout.write("Checksums differ.\nNew checksums:\n")
     60     WriteLLVMChecksums(checksums, sys.stdout)
     61     sys.stdout.write("Reference checksums:\n")
     62     WriteLLVMChecksums(reference_checksums, sys.stdout)
     63     sys.exit(1)
     64   else:
     65     sys.stdout.write("Checksums match.")
     66 
     67 
     68 def ComputeLLVMChecksums(root_path, projects):
     69   """Compute checksums for LLVM sources checked out using svn.
     70 
     71   Args:
     72     root_path: a directory of llvm checkout.
     73     projects: a list of LLVMProject instances, which describe checkout paths,
     74       relative to root_path.
     75 
     76   Returns:
     77     A dict mapping from project name to project checksum.
     78   """
     79   hash_algo = hashlib.sha256
     80 
     81   def collapse_svn_substitutions(contents):
     82     # Replace svn substitutions for $Date: 2021/05/30 01:28:21 $ and $LastChangedDate$.
     83     # Unfortunately, these are locale-specific.
     84     return SVN_DATES_REGEX.sub("$\1$", contents)
     85 
     86   def read_and_collapse_svn_subsitutions(file_path):
     87     with open(file_path, "rb") as f:
     88       contents = f.read()
     89       new_contents = collapse_svn_substitutions(contents)
     90       if contents != new_contents:
     91         logging.debug("Replaced svn keyword substitutions in %s", file_path)
     92         logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
     93       return new_contents
     94 
     95   project_checksums = dict()
     96   # Hash each project.
     97   for proj in projects:
     98     project_root = os.path.join(root_path, proj.relpath)
     99     if not os.path.exists(project_root):
    100       logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
    101                    proj.name)
    102       continue
    103 
    104     files = list()
    105 
    106     def add_file_hash(file_path):
    107       if os.path.islink(file_path) and not os.path.exists(file_path):
    108         content = os.readlink(file_path)
    109       else:
    110         content = read_and_collapse_svn_subsitutions(file_path)
    111       hasher = hash_algo()
    112       hasher.update(content)
    113       file_digest = hasher.hexdigest()
    114       logging.debug("Checksum %s for file %s", file_digest, file_path)
    115       files.append((file_path, file_digest))
    116 
    117     logging.info("Computing checksum for %s", proj.name)
    118     WalkProjectFiles(root_path, projects, proj, add_file_hash)
    119 
    120     # Compute final checksum.
    121     files.sort(key=lambda x: x[0])
    122     hasher = hash_algo()
    123     for file_path, file_digest in files:
    124       file_path = os.path.relpath(file_path, project_root)
    125       hasher.update(file_path)
    126       hasher.update(file_digest)
    127     project_checksums[proj.name] = hasher.hexdigest()
    128   return project_checksums
    129 
    130 
    131 def WriteLLVMChecksums(checksums, f):
    132   """Writes checksums to a text file.
    133 
    134   Args:
    135     checksums: a dict mapping from project name to project checksum (result of
    136       ComputeLLVMChecksums).
    137     f: a file object to write into.
    138   """
    139 
    140   for proj in sorted(checksums.keys()):
    141     f.write("{} {}\n".format(checksums[proj], proj))
    142 
    143 
    144 def ReadLLVMChecksums(f):
    145   """Reads checksums from a text file, produced by WriteLLVMChecksums.
    146 
    147   Returns:
    148     A dict, mapping from project name to project checksum.
    149   """
    150   checksums = {}
    151   while True:
    152     line = f.readline()
    153     if line == "":
    154       break
    155     checksum, proj = line.split()
    156     checksums[proj] = checksum
    157   return checksums
    158 
    159 
    160 def ValidateChecksums(reference_checksums,
    161                       new_checksums,
    162                       allow_missing_projects=False):
    163   """Validates that reference_checksums and new_checksums match.
    164 
    165   Args:
    166     reference_checksums: a dict of reference checksums, mapping from a project
    167       name to a project checksum.
    168     new_checksums: a dict of checksums to be checked, mapping from a project
    169       name to a project checksum.
    170     allow_missing_projects:
    171       When True, reference_checksums may contain more projects than
    172         new_checksums. Projects missing from new_checksums are ignored.
    173       When False, new_checksums and reference_checksums must contain checksums
    174         for the same set of projects. If there is a project in
    175         reference_checksums, missing from new_checksums, ValidateChecksums
    176         will return False.
    177 
    178   Returns:
    179     True, if checksums match with regards to allow_missing_projects flag value.
    180     False, otherwise.
    181   """
    182   if not allow_missing_projects:
    183     if len(new_checksums) != len(reference_checksums):
    184       return False
    185 
    186   for proj, checksum in new_checksums.items():
    187     # We never computed a checksum for this project.
    188     if proj not in reference_checksums:
    189       return False
    190     # Checksum did not match.
    191     if reference_checksums[proj] != checksum:
    192       return False
    193 
    194   return True
    195 
    196 
    197 if __name__ == "__main__":
    198   main()
    199