Home | History | Annotate | Line # | Download | only in contrib
mklog.py revision 1.1.1.1
      1 #!/usr/bin/env python3
      2 
      3 # Copyright (C) 2020 Free Software Foundation, Inc.
      4 #
      5 # This file is part of GCC.
      6 #
      7 # GCC is free software; you can redistribute it and/or modify
      8 # it under the terms of the GNU General Public License as published by
      9 # the Free Software Foundation; either version 3, or (at your option)
     10 # any later version.
     11 #
     12 # GCC is distributed in the hope that it will be useful,
     13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 # GNU General Public License for more details.
     16 #
     17 # You should have received a copy of the GNU General Public License
     18 # along with GCC; see the file COPYING.  If not, write to
     19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
     20 # Boston, MA 02110-1301, USA.
     21 
     22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
     23 # and adds a skeleton ChangeLog file to the file. It does not try to be
     24 # too smart when parsing function names, but it produces a reasonable
     25 # approximation.
     26 #
     27 # Author: Martin Liska <mliska (at] suse.cz>
     28 
     29 import argparse
     30 import datetime
     31 import os
     32 import re
     33 import subprocess
     34 import sys
     35 from itertools import takewhile
     36 
     37 import requests
     38 
     39 from unidiff import PatchSet
     40 
     41 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
     42 prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
     43 dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
     44 dg_regex = re.compile(r'{\s+dg-(error|warning)')
     45 pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
     46 identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
     47 comment_regex = re.compile(r'^\/\*')
     48 struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
     49                           r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
     50 macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
     51 super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
     52 fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
     53 template_and_param_regex = re.compile(r'<[^<>]*>')
     54 md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
     55 bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
     56                'include_fields=summary,component'
     57 
     58 function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
     59 
     60 # NB: Makefile.in isn't listed as it's not always generated.
     61 generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
     62 
     63 help_message = """\
     64 Generate ChangeLog template for PATCH.
     65 PATCH must be generated using diff(1)'s -up or -cp options
     66 (or their equivalent in git).
     67 """
     68 
     69 script_folder = os.path.realpath(__file__)
     70 root = os.path.dirname(os.path.dirname(script_folder))
     71 
     72 firstpr = ''
     73 
     74 
     75 def find_changelog(path):
     76     folder = os.path.split(path)[0]
     77     while True:
     78         if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
     79             return folder
     80         folder = os.path.dirname(folder)
     81         if folder == '':
     82             return folder
     83     raise AssertionError()
     84 
     85 
     86 def extract_function_name(line):
     87     if comment_regex.match(line):
     88         return None
     89     m = struct_regex.search(line)
     90     if m:
     91         # Struct declaration
     92         return m.group(1) + ' ' + m.group(3)
     93     m = macro_regex.search(line)
     94     if m:
     95         # Macro definition
     96         return m.group(2)
     97     m = super_macro_regex.search(line)
     98     if m:
     99         # Supermacro
    100         return m.group(1)
    101     m = fn_regex.search(line)
    102     if m:
    103         # Discard template and function parameters.
    104         fn = m.group(1)
    105         fn = re.sub(template_and_param_regex, '', fn)
    106         return fn.rstrip()
    107     return None
    108 
    109 
    110 def try_add_function(functions, line):
    111     fn = extract_function_name(line)
    112     if fn and fn not in functions:
    113         functions.append(fn)
    114     return bool(fn)
    115 
    116 
    117 def sort_changelog_files(changed_file):
    118     return (changed_file.is_added_file, changed_file.is_removed_file)
    119 
    120 
    121 def get_pr_titles(prs):
    122     output = []
    123     for idx, pr in enumerate(prs):
    124         pr_id = pr.split('/')[-1]
    125         r = requests.get(bugzilla_url % pr_id)
    126         bugs = r.json()['bugs']
    127         if len(bugs) == 1:
    128             prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
    129             out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
    130             if out not in output:
    131                 output.append(out)
    132     if output:
    133         output.append('')
    134     return '\n'.join(output)
    135 
    136 
    137 def generate_changelog(data, no_functions=False, fill_pr_titles=False,
    138                        additional_prs=None):
    139     changelogs = {}
    140     changelog_list = []
    141     prs = []
    142     out = ''
    143     diff = PatchSet(data)
    144     global firstpr
    145 
    146     if additional_prs:
    147         prs = [pr for pr in additional_prs if pr not in prs]
    148     for file in diff:
    149         # skip files that can't be parsed
    150         if file.path == '/dev/null':
    151             continue
    152         changelog = find_changelog(file.path)
    153         if changelog not in changelogs:
    154             changelogs[changelog] = []
    155             changelog_list.append(changelog)
    156         changelogs[changelog].append(file)
    157 
    158         # Extract PR entries from newly added tests
    159         if 'testsuite' in file.path and file.is_added_file:
    160             # Only search first ten lines as later lines may
    161             # contains commented code which a note that it
    162             # has not been tested due to a certain PR or DR.
    163             this_file_prs = []
    164             for line in list(file)[0][0:10]:
    165                 m = pr_regex.search(line.value)
    166                 if m:
    167                     pr = m.group('pr')
    168                     if pr not in prs:
    169                         prs.append(pr)
    170                         this_file_prs.append(pr.split('/')[-1])
    171                 else:
    172                     m = dr_regex.search(line.value)
    173                     if m:
    174                         dr = m.group('dr')
    175                         if dr not in prs:
    176                             prs.append(dr)
    177                             this_file_prs.append(dr.split('/')[-1])
    178                     elif dg_regex.search(line.value):
    179                         # Found dg-warning/dg-error line
    180                         break
    181             # PR number in the file name
    182             fname = os.path.basename(file.path)
    183             m = pr_filename_regex.search(fname)
    184             if m:
    185                 pr = m.group('pr')
    186                 pr2 = 'PR ' + pr
    187                 if pr not in this_file_prs and pr2 not in prs:
    188                     prs.append(pr2)
    189 
    190     if prs:
    191         firstpr = prs[0]
    192 
    193     if fill_pr_titles:
    194         out += get_pr_titles(prs)
    195 
    196     # print list of PR entries before ChangeLog entries
    197     if prs:
    198         if not out:
    199             out += '\n'
    200         for pr in prs:
    201             out += '\t%s\n' % pr
    202         out += '\n'
    203 
    204     # sort ChangeLog so that 'testsuite' is at the end
    205     for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
    206         files = changelogs[changelog]
    207         out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
    208         out += '\n'
    209         # new and deleted files should be at the end
    210         for file in sorted(files, key=sort_changelog_files):
    211             assert file.path.startswith(changelog)
    212             in_tests = 'testsuite' in changelog or 'testsuite' in file.path
    213             relative_path = file.path[len(changelog):].lstrip('/')
    214             functions = []
    215             if file.is_added_file:
    216                 msg = 'New test' if in_tests else 'New file'
    217                 out += '\t* %s: %s.\n' % (relative_path, msg)
    218             elif file.is_removed_file:
    219                 out += '\t* %s: Removed.\n' % (relative_path)
    220             elif hasattr(file, 'is_rename') and file.is_rename:
    221                 out += '\t* %s: Moved to...\n' % (relative_path)
    222                 new_path = file.target_file[2:]
    223                 # A file can be theoretically moved to a location that
    224                 # belongs to a different ChangeLog.  Let user fix it.
    225                 if new_path.startswith(changelog):
    226                     new_path = new_path[len(changelog):].lstrip('/')
    227                 out += '\t* %s: ...here.\n' % (new_path)
    228             elif os.path.basename(file.path) in generated_files:
    229                 out += '\t* %s: Regenerate.\n' % (relative_path)
    230             else:
    231                 if not no_functions:
    232                     for hunk in file:
    233                         # Do not add function names for testsuite files
    234                         extension = os.path.splitext(relative_path)[1]
    235                         if not in_tests and extension in function_extensions:
    236                             last_fn = None
    237                             modified_visited = False
    238                             success = False
    239                             for line in hunk:
    240                                 m = identifier_regex.match(line.value)
    241                                 if line.is_added or line.is_removed:
    242                                     # special-case definition in .md files
    243                                     m2 = md_def_regex.match(line.value)
    244                                     if extension == '.md' and m2:
    245                                         fn = m2.group(1)
    246                                         if fn not in functions:
    247                                             functions.append(fn)
    248                                             last_fn = None
    249                                             success = True
    250 
    251                                     if not line.value.strip():
    252                                         continue
    253                                     modified_visited = True
    254                                     if m and try_add_function(functions,
    255                                                               m.group(1)):
    256                                         last_fn = None
    257                                         success = True
    258                                 elif line.is_context:
    259                                     if last_fn and modified_visited:
    260                                         try_add_function(functions, last_fn)
    261                                         last_fn = None
    262                                         modified_visited = False
    263                                         success = True
    264                                     elif m:
    265                                         last_fn = m.group(1)
    266                                         modified_visited = False
    267                             if not success:
    268                                 try_add_function(functions,
    269                                                  hunk.section_header)
    270                 if functions:
    271                     out += '\t* %s (%s):\n' % (relative_path, functions[0])
    272                     for fn in functions[1:]:
    273                         out += '\t(%s):\n' % fn
    274                 else:
    275                     out += '\t* %s:\n' % relative_path
    276         out += '\n'
    277     return out
    278 
    279 
    280 def update_copyright(data):
    281     current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
    282     username = subprocess.check_output('git config user.name', shell=True,
    283                                        encoding='utf8').strip()
    284     email = subprocess.check_output('git config user.email', shell=True,
    285                                     encoding='utf8').strip()
    286 
    287     changelogs = set()
    288     diff = PatchSet(data)
    289 
    290     for file in diff:
    291         changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
    292         if changelog not in changelogs:
    293             changelogs.add(changelog)
    294             with open(changelog) as f:
    295                 content = f.read()
    296             with open(changelog, 'w+') as f:
    297                 f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
    298                 f.write('\tUpdate copyright years.\n\n')
    299                 f.write(content)
    300 
    301 
    302 if __name__ == '__main__':
    303     parser = argparse.ArgumentParser(description=help_message)
    304     parser.add_argument('input', nargs='?',
    305                         help='Patch file (or missing, read standard input)')
    306     parser.add_argument('-b', '--pr-numbers', action='store',
    307                         type=lambda arg: arg.split(','), nargs='?',
    308                         help='Add the specified PRs (comma separated)')
    309     parser.add_argument('-s', '--no-functions', action='store_true',
    310                         help='Do not generate function names in ChangeLogs')
    311     parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
    312                         help='Download title of mentioned PRs')
    313     parser.add_argument('-d', '--directory',
    314                         help='Root directory where to search for ChangeLog '
    315                         'files')
    316     parser.add_argument('-c', '--changelog',
    317                         help='Append the ChangeLog to a git commit message '
    318                              'file')
    319     parser.add_argument('--update-copyright', action='store_true',
    320                         help='Update copyright in ChangeLog files')
    321     args = parser.parse_args()
    322     if args.input == '-':
    323         args.input = None
    324     if args.directory:
    325         root = args.directory
    326 
    327     data = open(args.input) if args.input else sys.stdin
    328     if args.update_copyright:
    329         update_copyright(data)
    330     else:
    331         output = generate_changelog(data, args.no_functions,
    332                                     args.fill_up_bug_titles, args.pr_numbers)
    333         if args.changelog:
    334             lines = open(args.changelog).read().split('\n')
    335             start = list(takewhile(lambda l: not l.startswith('#'), lines))
    336             end = lines[len(start):]
    337             with open(args.changelog, 'w') as f:
    338                 if not start or not start[0]:
    339                     # initial commit subject line 'component: [PRnnnnn]'
    340                     m = prnum_regex.match(firstpr)
    341                     if m:
    342                         title = f'{m.group("comp")}: [PR{m.group("num")}]'
    343                         start.insert(0, title)
    344                 if start:
    345                     # append empty line
    346                     if start[-1] != '':
    347                         start.append('')
    348                 else:
    349                     # append 2 empty lines
    350                     start = 2 * ['']
    351                 f.write('\n'.join(start))
    352                 f.write('\n')
    353                 f.write(output)
    354                 f.write('\n'.join(end))
    355         else:
    356             print(output, end='')
    357