Home | History | Annotate | Line # | Download | only in gcc-changelog
git_commit.py revision 1.1
      1  1.1  mrg #!/usr/bin/env python3
      2  1.1  mrg #
      3  1.1  mrg # This file is part of GCC.
      4  1.1  mrg #
      5  1.1  mrg # GCC is free software; you can redistribute it and/or modify it under
      6  1.1  mrg # the terms of the GNU General Public License as published by the Free
      7  1.1  mrg # Software Foundation; either version 3, or (at your option) any later
      8  1.1  mrg # version.
      9  1.1  mrg #
     10  1.1  mrg # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     11  1.1  mrg # WARRANTY; without even the implied warranty of MERCHANTABILITY or
     12  1.1  mrg # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     13  1.1  mrg # for more details.
     14  1.1  mrg #
     15  1.1  mrg # You should have received a copy of the GNU General Public License
     16  1.1  mrg # along with GCC; see the file COPYING3.  If not see
     17  1.1  mrg # <http://www.gnu.org/licenses/>.  */
     18  1.1  mrg 
     19  1.1  mrg import difflib
     20  1.1  mrg import os
     21  1.1  mrg import re
     22  1.1  mrg import sys
     23  1.1  mrg 
     24  1.1  mrg default_changelog_locations = {
     25  1.1  mrg     'c++tools',
     26  1.1  mrg     'config',
     27  1.1  mrg     'contrib',
     28  1.1  mrg     'contrib/header-tools',
     29  1.1  mrg     'contrib/reghunt',
     30  1.1  mrg     'contrib/regression',
     31  1.1  mrg     'fixincludes',
     32  1.1  mrg     'gcc/ada',
     33  1.1  mrg     'gcc/analyzer',
     34  1.1  mrg     'gcc/brig',
     35  1.1  mrg     'gcc/c',
     36  1.1  mrg     'gcc/c-family',
     37  1.1  mrg     'gcc',
     38  1.1  mrg     'gcc/cp',
     39  1.1  mrg     'gcc/d',
     40  1.1  mrg     'gcc/fortran',
     41  1.1  mrg     'gcc/go',
     42  1.1  mrg     'gcc/jit',
     43  1.1  mrg     'gcc/lto',
     44  1.1  mrg     'gcc/objc',
     45  1.1  mrg     'gcc/objcp',
     46  1.1  mrg     'gcc/po',
     47  1.1  mrg     'gcc/testsuite',
     48  1.1  mrg     'gnattools',
     49  1.1  mrg     'gotools',
     50  1.1  mrg     'include',
     51  1.1  mrg     'intl',
     52  1.1  mrg     'libada',
     53  1.1  mrg     'libatomic',
     54  1.1  mrg     'libbacktrace',
     55  1.1  mrg     'libcc1',
     56  1.1  mrg     'libcody',
     57  1.1  mrg     'libcpp',
     58  1.1  mrg     'libcpp/po',
     59  1.1  mrg     'libdecnumber',
     60  1.1  mrg     'libffi',
     61  1.1  mrg     'libgcc',
     62  1.1  mrg     'libgcc/config/avr/libf7',
     63  1.1  mrg     'libgcc/config/libbid',
     64  1.1  mrg     'libgfortran',
     65  1.1  mrg     'libgomp',
     66  1.1  mrg     'libhsail-rt',
     67  1.1  mrg     'libiberty',
     68  1.1  mrg     'libitm',
     69  1.1  mrg     'libobjc',
     70  1.1  mrg     'liboffloadmic',
     71  1.1  mrg     'libphobos',
     72  1.1  mrg     'libquadmath',
     73  1.1  mrg     'libsanitizer',
     74  1.1  mrg     'libssp',
     75  1.1  mrg     'libstdc++-v3',
     76  1.1  mrg     'libvtv',
     77  1.1  mrg     'lto-plugin',
     78  1.1  mrg     'maintainer-scripts',
     79  1.1  mrg     'zlib'}
     80  1.1  mrg 
     81  1.1  mrg bug_components = {
     82  1.1  mrg     'ada',
     83  1.1  mrg     'analyzer',
     84  1.1  mrg     'boehm-gc',
     85  1.1  mrg     'bootstrap',
     86  1.1  mrg     'c',
     87  1.1  mrg     'c++',
     88  1.1  mrg     'd',
     89  1.1  mrg     'debug',
     90  1.1  mrg     'demangler',
     91  1.1  mrg     'driver',
     92  1.1  mrg     'fastjar',
     93  1.1  mrg     'fortran',
     94  1.1  mrg     'gcov-profile',
     95  1.1  mrg     'go',
     96  1.1  mrg     'hsa',
     97  1.1  mrg     'inline-asm',
     98  1.1  mrg     'ipa',
     99  1.1  mrg     'java',
    100  1.1  mrg     'jit',
    101  1.1  mrg     'libbacktrace',
    102  1.1  mrg     'libf2c',
    103  1.1  mrg     'libffi',
    104  1.1  mrg     'libfortran',
    105  1.1  mrg     'libgcc',
    106  1.1  mrg     'libgcj',
    107  1.1  mrg     'libgomp',
    108  1.1  mrg     'libitm',
    109  1.1  mrg     'libobjc',
    110  1.1  mrg     'libquadmath',
    111  1.1  mrg     'libstdc++',
    112  1.1  mrg     'lto',
    113  1.1  mrg     'middle-end',
    114  1.1  mrg     'modula2',
    115  1.1  mrg     'objc',
    116  1.1  mrg     'objc++',
    117  1.1  mrg     'other',
    118  1.1  mrg     'pch',
    119  1.1  mrg     'pending',
    120  1.1  mrg     'plugins',
    121  1.1  mrg     'preprocessor',
    122  1.1  mrg     'regression',
    123  1.1  mrg     'rtl-optimization',
    124  1.1  mrg     'sanitizer',
    125  1.1  mrg     'spam',
    126  1.1  mrg     'target',
    127  1.1  mrg     'testsuite',
    128  1.1  mrg     'translation',
    129  1.1  mrg     'tree-optimization',
    130  1.1  mrg     'web'}
    131  1.1  mrg 
    132  1.1  mrg ignored_prefixes = {
    133  1.1  mrg     'gcc/d/dmd/',
    134  1.1  mrg     'gcc/go/gofrontend/',
    135  1.1  mrg     'gcc/testsuite/gdc.test/',
    136  1.1  mrg     'gcc/testsuite/go.test/test/',
    137  1.1  mrg     'libffi/',
    138  1.1  mrg     'libgo/',
    139  1.1  mrg     'libphobos/libdruntime/',
    140  1.1  mrg     'libphobos/src/',
    141  1.1  mrg     'libsanitizer/',
    142  1.1  mrg     }
    143  1.1  mrg 
    144  1.1  mrg wildcard_prefixes = {
    145  1.1  mrg     'gcc/testsuite/',
    146  1.1  mrg     'libstdc++-v3/doc/html/',
    147  1.1  mrg     'libstdc++-v3/testsuite/'
    148  1.1  mrg     }
    149  1.1  mrg 
    150  1.1  mrg misc_files = {
    151  1.1  mrg     'gcc/DATESTAMP',
    152  1.1  mrg     'gcc/BASE-VER',
    153  1.1  mrg     'gcc/DEV-PHASE'
    154  1.1  mrg     }
    155  1.1  mrg 
    156  1.1  mrg author_line_regex = \
    157  1.1  mrg         re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
    158  1.1  mrg additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
    159  1.1  mrg changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
    160  1.1  mrg subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
    161  1.1  mrg subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
    162  1.1  mrg pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
    163  1.1  mrg dr_regex = re.compile(r'\tDR ([0-9]+)$')
    164  1.1  mrg star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
    165  1.1  mrg end_of_location_regex = re.compile(r'[\[<(:]')
    166  1.1  mrg item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
    167  1.1  mrg item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
    168  1.1  mrg revert_regex = re.compile(r'This reverts commit (?P<hash>\w+).$')
    169  1.1  mrg cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
    170  1.1  mrg 
    171  1.1  mrg LINE_LIMIT = 100
    172  1.1  mrg TAB_WIDTH = 8
    173  1.1  mrg CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
    174  1.1  mrg 
    175  1.1  mrg REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
    176  1.1  mrg                    'acked-by: ', 'tested-by: ', 'reported-by: ',
    177  1.1  mrg                    'suggested-by: ')
    178  1.1  mrg DATE_FORMAT = '%Y-%m-%d'
    179  1.1  mrg 
    180  1.1  mrg 
    181  1.1  mrg def decode_path(path):
    182  1.1  mrg     # When core.quotepath is true (default value), utf8 chars are encoded like:
    183  1.1  mrg     # "b/ko\304\215ka.txt"
    184  1.1  mrg     #
    185  1.1  mrg     # The upstream bug is fixed:
    186  1.1  mrg     # https://github.com/gitpython-developers/GitPython/issues/1099
    187  1.1  mrg     #
    188  1.1  mrg     # but we still need a workaround for older versions of the library.
    189  1.1  mrg     # Please take a look at the explanation of the transformation:
    190  1.1  mrg     # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
    191  1.1  mrg 
    192  1.1  mrg     if path.startswith('"') and path.endswith('"'):
    193  1.1  mrg         return (path.strip('"').encode('utf8').decode('unicode-escape')
    194  1.1  mrg                 .encode('latin-1').decode('utf8'))
    195  1.1  mrg     else:
    196  1.1  mrg         return path
    197  1.1  mrg 
    198  1.1  mrg 
    199  1.1  mrg class Error:
    200  1.1  mrg     def __init__(self, message, line=None, details=None):
    201  1.1  mrg         self.message = message
    202  1.1  mrg         self.line = line
    203  1.1  mrg         self.details = details
    204  1.1  mrg 
    205  1.1  mrg     def __repr__(self):
    206  1.1  mrg         s = self.message
    207  1.1  mrg         if self.line:
    208  1.1  mrg             s += ': "%s"' % self.line
    209  1.1  mrg         return s
    210  1.1  mrg 
    211  1.1  mrg 
    212  1.1  mrg class ChangeLogEntry:
    213  1.1  mrg     def __init__(self, folder, authors, prs):
    214  1.1  mrg         self.folder = folder
    215  1.1  mrg         # The 'list.copy()' function is not available before Python 3.3
    216  1.1  mrg         self.author_lines = list(authors)
    217  1.1  mrg         self.initial_prs = list(prs)
    218  1.1  mrg         self.prs = list(prs)
    219  1.1  mrg         self.lines = []
    220  1.1  mrg         self.files = []
    221  1.1  mrg         self.file_patterns = []
    222  1.1  mrg         self.parentheses_stack = []
    223  1.1  mrg 
    224  1.1  mrg     def parse_file_names(self):
    225  1.1  mrg         # Whether the content currently processed is between a star prefix the
    226  1.1  mrg         # end of the file list: a colon or an open paren.
    227  1.1  mrg         in_location = False
    228  1.1  mrg 
    229  1.1  mrg         for line in self.lines:
    230  1.1  mrg             # If this line matches the star prefix, start the location
    231  1.1  mrg             # processing on the information that follows the star.
    232  1.1  mrg             # Note that we need to skip macro names that can be in form of:
    233  1.1  mrg             #
    234  1.1  mrg             # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
    235  1.1  mrg             # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
    236  1.1  mrg             # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
    237  1.1  mrg             #
    238  1.1  mrg             m = star_prefix_regex.match(line)
    239  1.1  mrg             if m and len(m.group('spaces')) == 1:
    240  1.1  mrg                 in_location = True
    241  1.1  mrg                 line = m.group('content')
    242  1.1  mrg 
    243  1.1  mrg             if in_location:
    244  1.1  mrg                 # Strip everything that is not a filename in "line":
    245  1.1  mrg                 # entities "(NAME)", cases "<PATTERN>", conditions
    246  1.1  mrg                 # "[COND]", entry text (the colon, if present, and
    247  1.1  mrg                 # anything that follows it).
    248  1.1  mrg                 m = end_of_location_regex.search(line)
    249  1.1  mrg                 if m:
    250  1.1  mrg                     line = line[:m.start()]
    251  1.1  mrg                     in_location = False
    252  1.1  mrg 
    253  1.1  mrg                 # At this point, all that's left is a list of filenames
    254  1.1  mrg                 # separated by commas and whitespaces.
    255  1.1  mrg                 for file in line.split(','):
    256  1.1  mrg                     file = file.strip()
    257  1.1  mrg                     if file:
    258  1.1  mrg                         if file.endswith('*'):
    259  1.1  mrg                             self.file_patterns.append(file[:-1])
    260  1.1  mrg                         else:
    261  1.1  mrg                             self.files.append(file)
    262  1.1  mrg 
    263  1.1  mrg     @property
    264  1.1  mrg     def datetime(self):
    265  1.1  mrg         for author in self.author_lines:
    266  1.1  mrg             if author[1]:
    267  1.1  mrg                 return author[1]
    268  1.1  mrg         return None
    269  1.1  mrg 
    270  1.1  mrg     @property
    271  1.1  mrg     def authors(self):
    272  1.1  mrg         return [author_line[0] for author_line in self.author_lines]
    273  1.1  mrg 
    274  1.1  mrg     @property
    275  1.1  mrg     def is_empty(self):
    276  1.1  mrg         return not self.lines and self.prs == self.initial_prs
    277  1.1  mrg 
    278  1.1  mrg     def contains_author(self, author):
    279  1.1  mrg         for author_lines in self.author_lines:
    280  1.1  mrg             if author_lines[0] == author:
    281  1.1  mrg                 return True
    282  1.1  mrg         return False
    283  1.1  mrg 
    284  1.1  mrg 
    285  1.1  mrg class GitInfo:
    286  1.1  mrg     def __init__(self, hexsha, date, author, lines, modified_files):
    287  1.1  mrg         self.hexsha = hexsha
    288  1.1  mrg         self.date = date
    289  1.1  mrg         self.author = author
    290  1.1  mrg         self.lines = lines
    291  1.1  mrg         self.modified_files = modified_files
    292  1.1  mrg 
    293  1.1  mrg 
    294  1.1  mrg class GitCommit:
    295  1.1  mrg     def __init__(self, info, commit_to_info_hook=None, ref_name=None):
    296  1.1  mrg         self.original_info = info
    297  1.1  mrg         self.info = info
    298  1.1  mrg         self.message = None
    299  1.1  mrg         self.changes = None
    300  1.1  mrg         self.changelog_entries = []
    301  1.1  mrg         self.errors = []
    302  1.1  mrg         self.top_level_authors = []
    303  1.1  mrg         self.co_authors = []
    304  1.1  mrg         self.top_level_prs = []
    305  1.1  mrg         self.subject_prs = set()
    306  1.1  mrg         self.cherry_pick_commit = None
    307  1.1  mrg         self.revert_commit = None
    308  1.1  mrg         self.commit_to_info_hook = commit_to_info_hook
    309  1.1  mrg         self.init_changelog_locations(ref_name)
    310  1.1  mrg 
    311  1.1  mrg         # Skip Update copyright years commits
    312  1.1  mrg         if self.info.lines and self.info.lines[0] == 'Update copyright years.':
    313  1.1  mrg             return
    314  1.1  mrg 
    315  1.1  mrg         if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
    316  1.1  mrg             self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
    317  1.1  mrg 
    318  1.1  mrg         # Identify first if the commit is a Revert commit
    319  1.1  mrg         for line in self.info.lines:
    320  1.1  mrg             m = revert_regex.match(line)
    321  1.1  mrg             if m:
    322  1.1  mrg                 self.revert_commit = m.group('hash')
    323  1.1  mrg                 break
    324  1.1  mrg         if self.revert_commit:
    325  1.1  mrg             self.info = self.commit_to_info_hook(self.revert_commit)
    326  1.1  mrg 
    327  1.1  mrg         # The following happens for get_email.py:
    328  1.1  mrg         if not self.info:
    329  1.1  mrg             return
    330  1.1  mrg 
    331  1.1  mrg         self.check_commit_email()
    332  1.1  mrg 
    333  1.1  mrg         # Extract PR numbers form the subject line
    334  1.1  mrg         # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
    335  1.1  mrg         if self.info.lines and not self.revert_commit:
    336  1.1  mrg             self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
    337  1.1  mrg             for m in subject_pr_regex.finditer(info.lines[0]):
    338  1.1  mrg                 if not m.group('component') in bug_components:
    339  1.1  mrg                     self.errors.append(Error('invalid PR component in subject', info.lines[0]))
    340  1.1  mrg                 self.subject_prs.add(m.group('pr'))
    341  1.1  mrg 
    342  1.1  mrg         # Allow complete deletion of ChangeLog files in a commit
    343  1.1  mrg         project_files = [f for f in self.info.modified_files
    344  1.1  mrg                          if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
    345  1.1  mrg                          or f[0] in misc_files]
    346  1.1  mrg         ignored_files = [f for f in self.info.modified_files
    347  1.1  mrg                          if self.in_ignored_location(f[0])]
    348  1.1  mrg         if len(project_files) == len(self.info.modified_files):
    349  1.1  mrg             # All modified files are only MISC files
    350  1.1  mrg             return
    351  1.1  mrg         elif project_files:
    352  1.1  mrg             err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
    353  1.1  mrg                   'should be done separately from normal commits\n' \
    354  1.1  mrg                   '(note: ChangeLog entries will be automatically ' \
    355  1.1  mrg                   'added by a cron job)'
    356  1.1  mrg             self.errors.append(Error(err))
    357  1.1  mrg             return
    358  1.1  mrg 
    359  1.1  mrg         all_are_ignored = (len(project_files) + len(ignored_files)
    360  1.1  mrg                            == len(self.info.modified_files))
    361  1.1  mrg         self.parse_lines(all_are_ignored)
    362  1.1  mrg         if self.changes:
    363  1.1  mrg             self.parse_changelog()
    364  1.1  mrg             self.parse_file_names()
    365  1.1  mrg             self.check_for_empty_description()
    366  1.1  mrg             self.check_for_broken_parentheses()
    367  1.1  mrg             self.deduce_changelog_locations()
    368  1.1  mrg             self.check_file_patterns()
    369  1.1  mrg             if not self.errors:
    370  1.1  mrg                 self.check_mentioned_files()
    371  1.1  mrg                 self.check_for_correct_changelog()
    372  1.1  mrg         if self.subject_prs:
    373  1.1  mrg             self.errors.append(Error('PR %s in subject but not in changelog' %
    374  1.1  mrg                                      ', '.join(self.subject_prs), self.info.lines[0]))
    375  1.1  mrg 
    376  1.1  mrg     @property
    377  1.1  mrg     def success(self):
    378  1.1  mrg         return not self.errors
    379  1.1  mrg 
    380  1.1  mrg     @property
    381  1.1  mrg     def new_files(self):
    382  1.1  mrg         return [x[0] for x in self.info.modified_files if x[1] == 'A']
    383  1.1  mrg 
    384  1.1  mrg     @classmethod
    385  1.1  mrg     def is_changelog_filename(cls, path, allow_suffix=False):
    386  1.1  mrg         basename = os.path.basename(path)
    387  1.1  mrg         if basename == 'ChangeLog':
    388  1.1  mrg             return True
    389  1.1  mrg         elif allow_suffix and basename.startswith('ChangeLog'):
    390  1.1  mrg             return True
    391  1.1  mrg         else:
    392  1.1  mrg             return False
    393  1.1  mrg 
    394  1.1  mrg     def find_changelog_location(self, name):
    395  1.1  mrg         if name.startswith('\t'):
    396  1.1  mrg             name = name[1:]
    397  1.1  mrg         if name.endswith(':'):
    398  1.1  mrg             name = name[:-1]
    399  1.1  mrg         if name.endswith('/'):
    400  1.1  mrg             name = name[:-1]
    401  1.1  mrg         return name if name in self.changelog_locations else None
    402  1.1  mrg 
    403  1.1  mrg     @classmethod
    404  1.1  mrg     def format_git_author(cls, author):
    405  1.1  mrg         assert '<' in author
    406  1.1  mrg         return author.replace('<', ' <')
    407  1.1  mrg 
    408  1.1  mrg     @classmethod
    409  1.1  mrg     def parse_git_name_status(cls, string):
    410  1.1  mrg         modified_files = []
    411  1.1  mrg         for entry in string.split('\n'):
    412  1.1  mrg             parts = entry.split('\t')
    413  1.1  mrg             t = parts[0]
    414  1.1  mrg             if t == 'A' or t == 'D' or t == 'M':
    415  1.1  mrg                 modified_files.append((parts[1], t))
    416  1.1  mrg             elif t.startswith('R'):
    417  1.1  mrg                 modified_files.append((parts[1], 'D'))
    418  1.1  mrg                 modified_files.append((parts[2], 'A'))
    419  1.1  mrg         return modified_files
    420  1.1  mrg 
    421  1.1  mrg     def init_changelog_locations(self, ref_name):
    422  1.1  mrg         self.changelog_locations = list(default_changelog_locations)
    423  1.1  mrg         if ref_name:
    424  1.1  mrg             version = sys.maxsize
    425  1.1  mrg             if 'releases/gcc-' in ref_name:
    426  1.1  mrg                 version = int(ref_name.split('-')[-1])
    427  1.1  mrg             if version >= 12:
    428  1.1  mrg                 # HSA and BRIG were removed in GCC 12
    429  1.1  mrg                 self.changelog_locations.remove('gcc/brig')
    430  1.1  mrg                 self.changelog_locations.remove('libhsail-rt')
    431  1.1  mrg 
    432  1.1  mrg     def parse_lines(self, all_are_ignored):
    433  1.1  mrg         body = self.info.lines
    434  1.1  mrg 
    435  1.1  mrg         for i, b in enumerate(body):
    436  1.1  mrg             if not b:
    437  1.1  mrg                 continue
    438  1.1  mrg             if (changelog_regex.match(b) or self.find_changelog_location(b)
    439  1.1  mrg                     or star_prefix_regex.match(b) or pr_regex.match(b)
    440  1.1  mrg                     or dr_regex.match(b) or author_line_regex.match(b)
    441  1.1  mrg                     or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
    442  1.1  mrg                 self.changes = body[i:]
    443  1.1  mrg                 return
    444  1.1  mrg         if not all_are_ignored:
    445  1.1  mrg             self.errors.append(Error('cannot find a ChangeLog location in '
    446  1.1  mrg                                      'message'))
    447  1.1  mrg 
    448  1.1  mrg     def parse_changelog(self):
    449  1.1  mrg         last_entry = None
    450  1.1  mrg         will_deduce = False
    451  1.1  mrg         for line in self.changes:
    452  1.1  mrg             if not line:
    453  1.1  mrg                 if last_entry and will_deduce:
    454  1.1  mrg                     last_entry = None
    455  1.1  mrg                 continue
    456  1.1  mrg             if line != line.rstrip():
    457  1.1  mrg                 self.errors.append(Error('trailing whitespace', line))
    458  1.1  mrg             if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
    459  1.1  mrg                 # support long filenames
    460  1.1  mrg                 if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
    461  1.1  mrg                     self.errors.append(Error('line exceeds %d character limit'
    462  1.1  mrg                                              % LINE_LIMIT, line))
    463  1.1  mrg             m = changelog_regex.match(line)
    464  1.1  mrg             if m:
    465  1.1  mrg                 last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
    466  1.1  mrg                                             self.top_level_authors,
    467  1.1  mrg                                             self.top_level_prs)
    468  1.1  mrg                 self.changelog_entries.append(last_entry)
    469  1.1  mrg             elif self.find_changelog_location(line):
    470  1.1  mrg                 last_entry = ChangeLogEntry(self.find_changelog_location(line),
    471  1.1  mrg                                             self.top_level_authors,
    472  1.1  mrg                                             self.top_level_prs)
    473  1.1  mrg                 self.changelog_entries.append(last_entry)
    474  1.1  mrg             else:
    475  1.1  mrg                 author_tuple = None
    476  1.1  mrg                 pr_line = None
    477  1.1  mrg                 if author_line_regex.match(line):
    478  1.1  mrg                     m = author_line_regex.match(line)
    479  1.1  mrg                     author_tuple = (m.group('name'), m.group('datetime'))
    480  1.1  mrg                 elif additional_author_regex.match(line):
    481  1.1  mrg                     m = additional_author_regex.match(line)
    482  1.1  mrg                     if len(m.group('spaces')) != 4:
    483  1.1  mrg                         msg = 'additional author must be indented with '\
    484  1.1  mrg                               'one tab and four spaces'
    485  1.1  mrg                         self.errors.append(Error(msg, line))
    486  1.1  mrg                     else:
    487  1.1  mrg                         author_tuple = (m.group('name'), None)
    488  1.1  mrg                 elif pr_regex.match(line):
    489  1.1  mrg                     m = pr_regex.match(line)
    490  1.1  mrg                     component = m.group('component')
    491  1.1  mrg                     pr = m.group('pr')
    492  1.1  mrg                     if not component:
    493  1.1  mrg                         self.errors.append(Error('missing PR component', line))
    494  1.1  mrg                         continue
    495  1.1  mrg                     elif not component[:-1] in bug_components:
    496  1.1  mrg                         self.errors.append(Error('invalid PR component', line))
    497  1.1  mrg                         continue
    498  1.1  mrg                     else:
    499  1.1  mrg                         pr_line = line.lstrip()
    500  1.1  mrg                     if pr in self.subject_prs:
    501  1.1  mrg                         self.subject_prs.remove(pr)
    502  1.1  mrg                 elif dr_regex.match(line):
    503  1.1  mrg                     pr_line = line.lstrip()
    504  1.1  mrg 
    505  1.1  mrg                 lowered_line = line.lower()
    506  1.1  mrg                 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
    507  1.1  mrg                     name = line[len(CO_AUTHORED_BY_PREFIX):]
    508  1.1  mrg                     author = self.format_git_author(name)
    509  1.1  mrg                     self.co_authors.append(author)
    510  1.1  mrg                     continue
    511  1.1  mrg                 elif lowered_line.startswith(REVIEW_PREFIXES):
    512  1.1  mrg                     continue
    513  1.1  mrg                 else:
    514  1.1  mrg                     m = cherry_pick_regex.search(line)
    515  1.1  mrg                     if m:
    516  1.1  mrg                         commit = m.group('hash')
    517  1.1  mrg                         if self.cherry_pick_commit:
    518  1.1  mrg                             msg = 'multiple cherry pick lines'
    519  1.1  mrg                             self.errors.append(Error(msg, line))
    520  1.1  mrg                         else:
    521  1.1  mrg                             self.cherry_pick_commit = commit
    522  1.1  mrg                         continue
    523  1.1  mrg 
    524  1.1  mrg                 # ChangeLog name will be deduced later
    525  1.1  mrg                 if not last_entry:
    526  1.1  mrg                     if author_tuple:
    527  1.1  mrg                         self.top_level_authors.append(author_tuple)
    528  1.1  mrg                         continue
    529  1.1  mrg                     elif pr_line:
    530  1.1  mrg                         # append to top_level_prs only when we haven't met
    531  1.1  mrg                         # a ChangeLog entry
    532  1.1  mrg                         if (pr_line not in self.top_level_prs
    533  1.1  mrg                                 and not self.changelog_entries):
    534  1.1  mrg                             self.top_level_prs.append(pr_line)
    535  1.1  mrg                         continue
    536  1.1  mrg                     else:
    537  1.1  mrg                         last_entry = ChangeLogEntry(None,
    538  1.1  mrg                                                     self.top_level_authors,
    539  1.1  mrg                                                     self.top_level_prs)
    540  1.1  mrg                         self.changelog_entries.append(last_entry)
    541  1.1  mrg                         will_deduce = True
    542  1.1  mrg                 elif author_tuple:
    543  1.1  mrg                     if not last_entry.contains_author(author_tuple[0]):
    544  1.1  mrg                         last_entry.author_lines.append(author_tuple)
    545  1.1  mrg                     continue
    546  1.1  mrg 
    547  1.1  mrg                 if not line.startswith('\t'):
    548  1.1  mrg                     err = Error('line should start with a tab', line)
    549  1.1  mrg                     self.errors.append(err)
    550  1.1  mrg                 elif pr_line:
    551  1.1  mrg                     last_entry.prs.append(pr_line)
    552  1.1  mrg                 else:
    553  1.1  mrg                     m = star_prefix_regex.match(line)
    554  1.1  mrg                     if m:
    555  1.1  mrg                         if (len(m.group('spaces')) != 1 and
    556  1.1  mrg                                 not last_entry.parentheses_stack):
    557  1.1  mrg                             msg = 'one space should follow asterisk'
    558  1.1  mrg                             self.errors.append(Error(msg, line))
    559  1.1  mrg                         else:
    560  1.1  mrg                             content = m.group('content')
    561  1.1  mrg                             parts = content.split(':')
    562  1.1  mrg                             if len(parts) > 1:
    563  1.1  mrg                                 for needle in ('()', '[]', '<>'):
    564  1.1  mrg                                     if ' ' + needle in parts[0]:
    565  1.1  mrg                                         msg = f'empty group "{needle}" found'
    566  1.1  mrg                                         self.errors.append(Error(msg, line))
    567  1.1  mrg                             last_entry.lines.append(line)
    568  1.1  mrg                             self.process_parentheses(last_entry, line)
    569  1.1  mrg                     else:
    570  1.1  mrg                         if last_entry.is_empty:
    571  1.1  mrg                             msg = 'first line should start with a tab, ' \
    572  1.1  mrg                                   'an asterisk and a space'
    573  1.1  mrg                             self.errors.append(Error(msg, line))
    574  1.1  mrg                         else:
    575  1.1  mrg                             last_entry.lines.append(line)
    576  1.1  mrg                             self.process_parentheses(last_entry, line)
    577  1.1  mrg 
    578  1.1  mrg     def process_parentheses(self, last_entry, line):
    579  1.1  mrg         for c in line:
    580  1.1  mrg             if c == '(':
    581  1.1  mrg                 last_entry.parentheses_stack.append(line)
    582  1.1  mrg             elif c == ')':
    583  1.1  mrg                 if not last_entry.parentheses_stack:
    584  1.1  mrg                     msg = 'bad wrapping of parenthesis'
    585  1.1  mrg                     self.errors.append(Error(msg, line))
    586  1.1  mrg                 else:
    587  1.1  mrg                     del last_entry.parentheses_stack[-1]
    588  1.1  mrg 
    589  1.1  mrg     def parse_file_names(self):
    590  1.1  mrg         for entry in self.changelog_entries:
    591  1.1  mrg             entry.parse_file_names()
    592  1.1  mrg 
    593  1.1  mrg     def check_file_patterns(self):
    594  1.1  mrg         for entry in self.changelog_entries:
    595  1.1  mrg             for pattern in entry.file_patterns:
    596  1.1  mrg                 name = os.path.join(entry.folder, pattern)
    597  1.1  mrg                 if not [name.startswith(pr) for pr in wildcard_prefixes]:
    598  1.1  mrg                     msg = 'unsupported wildcard prefix'
    599  1.1  mrg                     self.errors.append(Error(msg, name))
    600  1.1  mrg 
    601  1.1  mrg     def check_for_empty_description(self):
    602  1.1  mrg         for entry in self.changelog_entries:
    603  1.1  mrg             for i, line in enumerate(entry.lines):
    604  1.1  mrg                 if (item_empty_regex.match(line) and
    605  1.1  mrg                     (i == len(entry.lines) - 1
    606  1.1  mrg                      or not entry.lines[i+1].strip()
    607  1.1  mrg                      or item_parenthesis_regex.match(entry.lines[i+1]))):
    608  1.1  mrg                     msg = 'missing description of a change'
    609  1.1  mrg                     self.errors.append(Error(msg, line))
    610  1.1  mrg 
    611  1.1  mrg     def check_for_broken_parentheses(self):
    612  1.1  mrg         for entry in self.changelog_entries:
    613  1.1  mrg             if entry.parentheses_stack:
    614  1.1  mrg                 msg = 'bad parentheses wrapping'
    615  1.1  mrg                 self.errors.append(Error(msg, entry.parentheses_stack[-1]))
    616  1.1  mrg 
    617  1.1  mrg     def get_file_changelog_location(self, changelog_file):
    618  1.1  mrg         for file in self.info.modified_files:
    619  1.1  mrg             if file[0] == changelog_file:
    620  1.1  mrg                 # root ChangeLog file
    621  1.1  mrg                 return ''
    622  1.1  mrg             index = file[0].find('/' + changelog_file)
    623  1.1  mrg             if index != -1:
    624  1.1  mrg                 return file[0][:index]
    625  1.1  mrg         return None
    626  1.1  mrg 
    627  1.1  mrg     def deduce_changelog_locations(self):
    628  1.1  mrg         for entry in self.changelog_entries:
    629  1.1  mrg             if not entry.folder:
    630  1.1  mrg                 changelog = None
    631  1.1  mrg                 for file in entry.files:
    632  1.1  mrg                     location = self.get_file_changelog_location(file)
    633  1.1  mrg                     if (location == ''
    634  1.1  mrg                        or (location and location in self.changelog_locations)):
    635  1.1  mrg                         if changelog and changelog != location:
    636  1.1  mrg                             msg = 'could not deduce ChangeLog file, ' \
    637  1.1  mrg                                   'not unique location'
    638  1.1  mrg                             self.errors.append(Error(msg))
    639  1.1  mrg                             return
    640  1.1  mrg                         changelog = location
    641  1.1  mrg                 if changelog is not None:
    642  1.1  mrg                     entry.folder = changelog
    643  1.1  mrg                 else:
    644  1.1  mrg                     msg = 'could not deduce ChangeLog file'
    645  1.1  mrg                     self.errors.append(Error(msg))
    646  1.1  mrg 
    647  1.1  mrg     @classmethod
    648  1.1  mrg     def in_ignored_location(cls, path):
    649  1.1  mrg         for ignored in ignored_prefixes:
    650  1.1  mrg             if path.startswith(ignored):
    651  1.1  mrg                 return True
    652  1.1  mrg         return False
    653  1.1  mrg 
    654  1.1  mrg     def get_changelog_by_path(self, path):
    655  1.1  mrg         components = path.split('/')
    656  1.1  mrg         while components:
    657  1.1  mrg             if '/'.join(components) in self.changelog_locations:
    658  1.1  mrg                 break
    659  1.1  mrg             components = components[:-1]
    660  1.1  mrg         return '/'.join(components)
    661  1.1  mrg 
    662  1.1  mrg     def check_mentioned_files(self):
    663  1.1  mrg         folder_count = len([x.folder for x in self.changelog_entries])
    664  1.1  mrg         assert folder_count == len(self.changelog_entries)
    665  1.1  mrg 
    666  1.1  mrg         mentioned_files = set()
    667  1.1  mrg         mentioned_patterns = []
    668  1.1  mrg         used_patterns = set()
    669  1.1  mrg         for entry in self.changelog_entries:
    670  1.1  mrg             if not entry.files and not entry.file_patterns:
    671  1.1  mrg                 msg = 'no files mentioned for ChangeLog in directory'
    672  1.1  mrg                 self.errors.append(Error(msg, entry.folder))
    673  1.1  mrg             assert not entry.folder.endswith('/')
    674  1.1  mrg             for file in entry.files:
    675  1.1  mrg                 if not self.is_changelog_filename(file):
    676  1.1  mrg                     item = os.path.join(entry.folder, file)
    677  1.1  mrg                     if item in mentioned_files:
    678  1.1  mrg                         msg = 'same file specified multiple times'
    679  1.1  mrg                         self.errors.append(Error(msg, file))
    680  1.1  mrg                     else:
    681  1.1  mrg                         mentioned_files.add(item)
    682  1.1  mrg             for pattern in entry.file_patterns:
    683  1.1  mrg                 mentioned_patterns.append(os.path.join(entry.folder, pattern))
    684  1.1  mrg 
    685  1.1  mrg         cand = [x[0] for x in self.info.modified_files
    686  1.1  mrg                 if not self.is_changelog_filename(x[0])]
    687  1.1  mrg         changed_files = set(cand)
    688  1.1  mrg         for file in sorted(mentioned_files - changed_files):
    689  1.1  mrg             msg = 'unchanged file mentioned in a ChangeLog'
    690  1.1  mrg             candidates = difflib.get_close_matches(file, changed_files, 1)
    691  1.1  mrg             details = None
    692  1.1  mrg             if candidates:
    693  1.1  mrg                 msg += f' (did you mean "{candidates[0]}"?)'
    694  1.1  mrg                 details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
    695  1.1  mrg             self.errors.append(Error(msg, file, details))
    696  1.1  mrg         for file in sorted(changed_files - mentioned_files):
    697  1.1  mrg             if not self.in_ignored_location(file):
    698  1.1  mrg                 if file in self.new_files:
    699  1.1  mrg                     changelog_location = self.get_changelog_by_path(file)
    700  1.1  mrg                     # Python2: we cannot use next(filter(...))
    701  1.1  mrg                     entries = filter(lambda x: x.folder == changelog_location,
    702  1.1  mrg                                      self.changelog_entries)
    703  1.1  mrg                     entries = list(entries)
    704  1.1  mrg                     entry = entries[0] if entries else None
    705  1.1  mrg                     if not entry:
    706  1.1  mrg                         prs = self.top_level_prs
    707  1.1  mrg                         if not prs:
    708  1.1  mrg                             # if all ChangeLog entries have identical PRs
    709  1.1  mrg                             # then use them
    710  1.1  mrg                             prs = self.changelog_entries[0].prs
    711  1.1  mrg                             for entry in self.changelog_entries:
    712  1.1  mrg                                 if entry.prs != prs:
    713  1.1  mrg                                     prs = []
    714  1.1  mrg                                     break
    715  1.1  mrg                         entry = ChangeLogEntry(changelog_location,
    716  1.1  mrg                                                self.top_level_authors,
    717  1.1  mrg                                                prs)
    718  1.1  mrg                         self.changelog_entries.append(entry)
    719  1.1  mrg                     # strip prefix of the file
    720  1.1  mrg                     assert file.startswith(entry.folder)
    721  1.1  mrg                     # do not allow auto-addition of New files
    722  1.1  mrg                     # for the top-level folder
    723  1.1  mrg                     if entry.folder:
    724  1.1  mrg                         file = file[len(entry.folder):].lstrip('/')
    725  1.1  mrg                         entry.lines.append('\t* %s: New file.' % file)
    726  1.1  mrg                         entry.files.append(file)
    727  1.1  mrg                     else:
    728  1.1  mrg                         msg = 'new file in the top-level folder not mentioned in a ChangeLog'
    729  1.1  mrg                         self.errors.append(Error(msg, file))
    730  1.1  mrg                 else:
    731  1.1  mrg                     used_pattern = [p for p in mentioned_patterns
    732  1.1  mrg                                     if file.startswith(p)]
    733  1.1  mrg                     used_pattern = used_pattern[0] if used_pattern else None
    734  1.1  mrg                     if used_pattern:
    735  1.1  mrg                         used_patterns.add(used_pattern)
    736  1.1  mrg                     else:
    737  1.1  mrg                         msg = 'changed file not mentioned in a ChangeLog'
    738  1.1  mrg                         self.errors.append(Error(msg, file))
    739  1.1  mrg 
    740  1.1  mrg         for pattern in mentioned_patterns:
    741  1.1  mrg             if pattern not in used_patterns:
    742  1.1  mrg                 error = "pattern doesn't match any changed files"
    743  1.1  mrg                 self.errors.append(Error(error, pattern))
    744  1.1  mrg 
    745  1.1  mrg     def check_for_correct_changelog(self):
    746  1.1  mrg         for entry in self.changelog_entries:
    747  1.1  mrg             for file in entry.files:
    748  1.1  mrg                 full_path = os.path.join(entry.folder, file)
    749  1.1  mrg                 changelog_location = self.get_changelog_by_path(full_path)
    750  1.1  mrg                 if changelog_location != entry.folder:
    751  1.1  mrg                     msg = 'wrong ChangeLog location "%s", should be "%s"'
    752  1.1  mrg                     err = Error(msg % (entry.folder, changelog_location), file)
    753  1.1  mrg                     self.errors.append(err)
    754  1.1  mrg 
    755  1.1  mrg     @classmethod
    756  1.1  mrg     def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
    757  1.1  mrg         output = ''
    758  1.1  mrg         for i, author in enumerate(authors):
    759  1.1  mrg             if i == 0:
    760  1.1  mrg                 output += '%s%s  %s\n' % (prefix, timestamp, author)
    761  1.1  mrg             else:
    762  1.1  mrg                 output += '%s\t    %s\n' % (prefix, author)
    763  1.1  mrg         output += '\n'
    764  1.1  mrg         return output
    765  1.1  mrg 
    766  1.1  mrg     def to_changelog_entries(self, use_commit_ts=False):
    767  1.1  mrg         current_timestamp = self.info.date.strftime(DATE_FORMAT)
    768  1.1  mrg         for entry in self.changelog_entries:
    769  1.1  mrg             output = ''
    770  1.1  mrg             timestamp = entry.datetime
    771  1.1  mrg             if self.revert_commit:
    772  1.1  mrg                 timestamp = current_timestamp
    773  1.1  mrg                 orig_date = self.original_info.date
    774  1.1  mrg                 current_timestamp = orig_date.strftime(DATE_FORMAT)
    775  1.1  mrg             elif self.cherry_pick_commit:
    776  1.1  mrg                 info = self.commit_to_info_hook(self.cherry_pick_commit)
    777  1.1  mrg                 # it can happen that it is a cherry-pick for a different
    778  1.1  mrg                 # repository
    779  1.1  mrg                 if info:
    780  1.1  mrg                     timestamp = info.date.strftime(DATE_FORMAT)
    781  1.1  mrg                 else:
    782  1.1  mrg                     timestamp = current_timestamp
    783  1.1  mrg             elif not timestamp or use_commit_ts:
    784  1.1  mrg                 timestamp = current_timestamp
    785  1.1  mrg             authors = entry.authors if entry.authors else [self.info.author]
    786  1.1  mrg             # add Co-Authored-By authors to all ChangeLog entries
    787  1.1  mrg             for author in self.co_authors:
    788  1.1  mrg                 if author not in authors:
    789  1.1  mrg                     authors.append(author)
    790  1.1  mrg 
    791  1.1  mrg             if self.cherry_pick_commit or self.revert_commit:
    792  1.1  mrg                 original_author = self.original_info.author
    793  1.1  mrg                 output += self.format_authors_in_changelog([original_author],
    794  1.1  mrg                                                            current_timestamp)
    795  1.1  mrg                 if self.revert_commit:
    796  1.1  mrg                     output += '\tRevert:\n'
    797  1.1  mrg                 else:
    798  1.1  mrg                     output += '\tBackported from master:\n'
    799  1.1  mrg                 output += self.format_authors_in_changelog(authors,
    800  1.1  mrg                                                            timestamp, '\t')
    801  1.1  mrg             else:
    802  1.1  mrg                 output += self.format_authors_in_changelog(authors, timestamp)
    803  1.1  mrg             for pr in entry.prs:
    804  1.1  mrg                 output += '\t%s\n' % pr
    805  1.1  mrg             for line in entry.lines:
    806  1.1  mrg                 output += line + '\n'
    807  1.1  mrg             yield (entry.folder, output.rstrip())
    808  1.1  mrg 
    809  1.1  mrg     def print_output(self):
    810  1.1  mrg         for entry, output in self.to_changelog_entries():
    811  1.1  mrg             print('------ %s/ChangeLog ------ ' % entry)
    812  1.1  mrg             print(output)
    813  1.1  mrg 
    814  1.1  mrg     def print_errors(self):
    815  1.1  mrg         print('Errors:')
    816  1.1  mrg         for error in self.errors:
    817  1.1  mrg             print(error)
    818  1.1  mrg 
    819  1.1  mrg     def check_commit_email(self):
    820  1.1  mrg         # Parse 'Martin Liska  <mliska (at] suse.cz>'
    821  1.1  mrg         email = self.info.author.split(' ')[-1].strip('<>')
    822  1.1  mrg 
    823  1.1  mrg         # Verify that all characters are ASCII
    824  1.1  mrg         # TODO: Python 3.7 provides a nicer function: isascii
    825  1.1  mrg         if len(email) != len(email.encode()):
    826  1.1  mrg             self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))
    827