mklog.py revision 1.1.1.1 1 #!/usr/bin/env python3
2
3 # Copyright (C) 2020 Free Software Foundation, Inc.
4 #
5 # This file is part of GCC.
6 #
7 # GCC is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3, or (at your option)
10 # any later version.
11 #
12 # GCC is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING. If not, write to
19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 # Boston, MA 02110-1301, USA.
21
22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23 # and adds a skeleton ChangeLog file to the file. It does not try to be
24 # too smart when parsing function names, but it produces a reasonable
25 # approximation.
26 #
27 # Author: Martin Liska <mliska (at] suse.cz>
28
29 import argparse
30 import datetime
31 import os
32 import re
33 import subprocess
34 import sys
35 from itertools import takewhile
36
37 import requests
38
39 from unidiff import PatchSet
40
41 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
42 prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
43 dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
44 dg_regex = re.compile(r'{\s+dg-(error|warning)')
45 pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
46 identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
47 comment_regex = re.compile(r'^\/\*')
48 struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
49 r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
50 macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
51 super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
52 fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
53 template_and_param_regex = re.compile(r'<[^<>]*>')
54 md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
55 bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
56 'include_fields=summary,component'
57
58 function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
59
60 # NB: Makefile.in isn't listed as it's not always generated.
61 generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
62
63 help_message = """\
64 Generate ChangeLog template for PATCH.
65 PATCH must be generated using diff(1)'s -up or -cp options
66 (or their equivalent in git).
67 """
68
69 script_folder = os.path.realpath(__file__)
70 root = os.path.dirname(os.path.dirname(script_folder))
71
72 firstpr = ''
73
74
75 def find_changelog(path):
76 folder = os.path.split(path)[0]
77 while True:
78 if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
79 return folder
80 folder = os.path.dirname(folder)
81 if folder == '':
82 return folder
83 raise AssertionError()
84
85
86 def extract_function_name(line):
87 if comment_regex.match(line):
88 return None
89 m = struct_regex.search(line)
90 if m:
91 # Struct declaration
92 return m.group(1) + ' ' + m.group(3)
93 m = macro_regex.search(line)
94 if m:
95 # Macro definition
96 return m.group(2)
97 m = super_macro_regex.search(line)
98 if m:
99 # Supermacro
100 return m.group(1)
101 m = fn_regex.search(line)
102 if m:
103 # Discard template and function parameters.
104 fn = m.group(1)
105 fn = re.sub(template_and_param_regex, '', fn)
106 return fn.rstrip()
107 return None
108
109
110 def try_add_function(functions, line):
111 fn = extract_function_name(line)
112 if fn and fn not in functions:
113 functions.append(fn)
114 return bool(fn)
115
116
117 def sort_changelog_files(changed_file):
118 return (changed_file.is_added_file, changed_file.is_removed_file)
119
120
121 def get_pr_titles(prs):
122 output = []
123 for idx, pr in enumerate(prs):
124 pr_id = pr.split('/')[-1]
125 r = requests.get(bugzilla_url % pr_id)
126 bugs = r.json()['bugs']
127 if len(bugs) == 1:
128 prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
129 out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
130 if out not in output:
131 output.append(out)
132 if output:
133 output.append('')
134 return '\n'.join(output)
135
136
137 def generate_changelog(data, no_functions=False, fill_pr_titles=False,
138 additional_prs=None):
139 changelogs = {}
140 changelog_list = []
141 prs = []
142 out = ''
143 diff = PatchSet(data)
144 global firstpr
145
146 if additional_prs:
147 prs = [pr for pr in additional_prs if pr not in prs]
148 for file in diff:
149 # skip files that can't be parsed
150 if file.path == '/dev/null':
151 continue
152 changelog = find_changelog(file.path)
153 if changelog not in changelogs:
154 changelogs[changelog] = []
155 changelog_list.append(changelog)
156 changelogs[changelog].append(file)
157
158 # Extract PR entries from newly added tests
159 if 'testsuite' in file.path and file.is_added_file:
160 # Only search first ten lines as later lines may
161 # contains commented code which a note that it
162 # has not been tested due to a certain PR or DR.
163 this_file_prs = []
164 for line in list(file)[0][0:10]:
165 m = pr_regex.search(line.value)
166 if m:
167 pr = m.group('pr')
168 if pr not in prs:
169 prs.append(pr)
170 this_file_prs.append(pr.split('/')[-1])
171 else:
172 m = dr_regex.search(line.value)
173 if m:
174 dr = m.group('dr')
175 if dr not in prs:
176 prs.append(dr)
177 this_file_prs.append(dr.split('/')[-1])
178 elif dg_regex.search(line.value):
179 # Found dg-warning/dg-error line
180 break
181 # PR number in the file name
182 fname = os.path.basename(file.path)
183 m = pr_filename_regex.search(fname)
184 if m:
185 pr = m.group('pr')
186 pr2 = 'PR ' + pr
187 if pr not in this_file_prs and pr2 not in prs:
188 prs.append(pr2)
189
190 if prs:
191 firstpr = prs[0]
192
193 if fill_pr_titles:
194 out += get_pr_titles(prs)
195
196 # print list of PR entries before ChangeLog entries
197 if prs:
198 if not out:
199 out += '\n'
200 for pr in prs:
201 out += '\t%s\n' % pr
202 out += '\n'
203
204 # sort ChangeLog so that 'testsuite' is at the end
205 for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
206 files = changelogs[changelog]
207 out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
208 out += '\n'
209 # new and deleted files should be at the end
210 for file in sorted(files, key=sort_changelog_files):
211 assert file.path.startswith(changelog)
212 in_tests = 'testsuite' in changelog or 'testsuite' in file.path
213 relative_path = file.path[len(changelog):].lstrip('/')
214 functions = []
215 if file.is_added_file:
216 msg = 'New test' if in_tests else 'New file'
217 out += '\t* %s: %s.\n' % (relative_path, msg)
218 elif file.is_removed_file:
219 out += '\t* %s: Removed.\n' % (relative_path)
220 elif hasattr(file, 'is_rename') and file.is_rename:
221 out += '\t* %s: Moved to...\n' % (relative_path)
222 new_path = file.target_file[2:]
223 # A file can be theoretically moved to a location that
224 # belongs to a different ChangeLog. Let user fix it.
225 if new_path.startswith(changelog):
226 new_path = new_path[len(changelog):].lstrip('/')
227 out += '\t* %s: ...here.\n' % (new_path)
228 elif os.path.basename(file.path) in generated_files:
229 out += '\t* %s: Regenerate.\n' % (relative_path)
230 else:
231 if not no_functions:
232 for hunk in file:
233 # Do not add function names for testsuite files
234 extension = os.path.splitext(relative_path)[1]
235 if not in_tests and extension in function_extensions:
236 last_fn = None
237 modified_visited = False
238 success = False
239 for line in hunk:
240 m = identifier_regex.match(line.value)
241 if line.is_added or line.is_removed:
242 # special-case definition in .md files
243 m2 = md_def_regex.match(line.value)
244 if extension == '.md' and m2:
245 fn = m2.group(1)
246 if fn not in functions:
247 functions.append(fn)
248 last_fn = None
249 success = True
250
251 if not line.value.strip():
252 continue
253 modified_visited = True
254 if m and try_add_function(functions,
255 m.group(1)):
256 last_fn = None
257 success = True
258 elif line.is_context:
259 if last_fn and modified_visited:
260 try_add_function(functions, last_fn)
261 last_fn = None
262 modified_visited = False
263 success = True
264 elif m:
265 last_fn = m.group(1)
266 modified_visited = False
267 if not success:
268 try_add_function(functions,
269 hunk.section_header)
270 if functions:
271 out += '\t* %s (%s):\n' % (relative_path, functions[0])
272 for fn in functions[1:]:
273 out += '\t(%s):\n' % fn
274 else:
275 out += '\t* %s:\n' % relative_path
276 out += '\n'
277 return out
278
279
280 def update_copyright(data):
281 current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
282 username = subprocess.check_output('git config user.name', shell=True,
283 encoding='utf8').strip()
284 email = subprocess.check_output('git config user.email', shell=True,
285 encoding='utf8').strip()
286
287 changelogs = set()
288 diff = PatchSet(data)
289
290 for file in diff:
291 changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
292 if changelog not in changelogs:
293 changelogs.add(changelog)
294 with open(changelog) as f:
295 content = f.read()
296 with open(changelog, 'w+') as f:
297 f.write(f'{current_timestamp} {username} <{email}>\n\n')
298 f.write('\tUpdate copyright years.\n\n')
299 f.write(content)
300
301
302 if __name__ == '__main__':
303 parser = argparse.ArgumentParser(description=help_message)
304 parser.add_argument('input', nargs='?',
305 help='Patch file (or missing, read standard input)')
306 parser.add_argument('-b', '--pr-numbers', action='store',
307 type=lambda arg: arg.split(','), nargs='?',
308 help='Add the specified PRs (comma separated)')
309 parser.add_argument('-s', '--no-functions', action='store_true',
310 help='Do not generate function names in ChangeLogs')
311 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
312 help='Download title of mentioned PRs')
313 parser.add_argument('-d', '--directory',
314 help='Root directory where to search for ChangeLog '
315 'files')
316 parser.add_argument('-c', '--changelog',
317 help='Append the ChangeLog to a git commit message '
318 'file')
319 parser.add_argument('--update-copyright', action='store_true',
320 help='Update copyright in ChangeLog files')
321 args = parser.parse_args()
322 if args.input == '-':
323 args.input = None
324 if args.directory:
325 root = args.directory
326
327 data = open(args.input) if args.input else sys.stdin
328 if args.update_copyright:
329 update_copyright(data)
330 else:
331 output = generate_changelog(data, args.no_functions,
332 args.fill_up_bug_titles, args.pr_numbers)
333 if args.changelog:
334 lines = open(args.changelog).read().split('\n')
335 start = list(takewhile(lambda l: not l.startswith('#'), lines))
336 end = lines[len(start):]
337 with open(args.changelog, 'w') as f:
338 if not start or not start[0]:
339 # initial commit subject line 'component: [PRnnnnn]'
340 m = prnum_regex.match(firstpr)
341 if m:
342 title = f'{m.group("comp")}: [PR{m.group("num")}]'
343 start.insert(0, title)
344 if start:
345 # append empty line
346 if start[-1] != '':
347 start.append('')
348 else:
349 # append 2 empty lines
350 start = 2 * ['']
351 f.write('\n'.join(start))
352 f.write('\n')
353 f.write(output)
354 f.write('\n'.join(end))
355 else:
356 print(output, end='')
357