1 #!/usr/bin/env python3 2 3 # Copyright (C) 2017-2019 Free Software Foundation, Inc. 4 # 5 # This file is part of GCC. 6 # 7 # GCC is free software; you can redistribute it and/or modify 8 # it under the terms of the GNU General Public License as published by 9 # the Free Software Foundation; either version 3, or (at your option) 10 # any later version. 11 # 12 # GCC is distributed in the hope that it will be useful, 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 # GNU General Public License for more details. 16 # 17 # You should have received a copy of the GNU General Public License 18 # along with GCC; see the file COPYING. If not, write to 19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20 # Boston, MA 02110-1301, USA. 21 22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp' 23 # and adds a skeleton ChangeLog file to the file. It does not try to be 24 # too smart when parsing function names, but it produces a reasonable 25 # approximation. 26 # 27 # This is a straightforward adaptation of original Perl script. 28 # 29 # Author: Yury Gribov <tetra2005 (at] gmail.com> 30 31 import argparse 32 import sys 33 import re 34 import os.path 35 import os 36 import tempfile 37 import time 38 import shutil 39 from subprocess import Popen, PIPE 40 41 me = os.path.basename(sys.argv[0]) 42 43 pr_regex = re.compile('\+(\/(\/|\*)|[Cc*!])\s+(PR [a-z+-]+\/[0-9]+)') 44 45 def error(msg): 46 sys.stderr.write("%s: error: %s\n" % (me, msg)) 47 sys.exit(1) 48 49 def warn(msg): 50 sys.stderr.write("%s: warning: %s\n" % (me, msg)) 51 52 class RegexCache(object): 53 """Simple trick to Perl-like combined match-and-bind.""" 54 55 def __init__(self): 56 self.last_match = None 57 58 def match(self, p, s): 59 self.last_match = re.match(p, s) if isinstance(p, str) else p.match(s) 60 return self.last_match 61 62 def search(self, p, s): 63 self.last_match = re.search(p, s) if isinstance(p, str) else p.search(s) 64 return self.last_match 65 66 def group(self, n): 67 return self.last_match.group(n) 68 69 cache = RegexCache() 70 71 def run(cmd, die_on_error): 72 """Simple wrapper for Popen.""" 73 proc = Popen(cmd.split(' '), stderr = PIPE, stdout = PIPE) 74 (out, err) = proc.communicate() 75 if die_on_error and proc.returncode != 0: 76 error("`%s` failed:\n" % (cmd, proc.stderr)) 77 return proc.returncode, out.decode(), err 78 79 def read_user_info(): 80 dot_mklog_format_msg = """\ 81 The .mklog format is: 82 NAME = ... 83 EMAIL = ... 84 """ 85 86 # First try to read .mklog config 87 mklog_conf = os.path.expanduser('~/.mklog') 88 if os.path.exists(mklog_conf): 89 attrs = {} 90 f = open(mklog_conf) 91 for s in f: 92 if cache.match(r'^\s*([a-zA-Z0-9_]+)\s*=\s*(.*?)\s*$', s): 93 attrs[cache.group(1)] = cache.group(2) 94 f.close() 95 if 'NAME' not in attrs: 96 error("'NAME' not present in .mklog") 97 if 'EMAIL' not in attrs: 98 error("'EMAIL' not present in .mklog") 99 return attrs['NAME'], attrs['EMAIL'] 100 101 # Otherwise go with git 102 103 rc1, name, _ = run('git config user.name', False) 104 name = name.rstrip() 105 rc2, email, _ = run('git config user.email', False) 106 email = email.rstrip() 107 108 if rc1 != 0 or rc2 != 0: 109 error("""\ 110 Could not read git user.name and user.email settings. 111 Please add missing git settings, or create a %s. 112 """ % mklog_conf) 113 114 return name, email 115 116 def get_parent_changelog (s): 117 """See which ChangeLog this file change should go to.""" 118 119 if s.find('\\') == -1 and s.find('/') == -1: 120 return "ChangeLog", s 121 122 gcc_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 123 124 d = s 125 while d: 126 clname = d + "/ChangeLog" 127 if os.path.exists(gcc_root + '/' + clname) or os.path.exists(clname): 128 relname = s[len(d)+1:] 129 return clname, relname 130 d, _ = os.path.split(d) 131 132 return "Unknown ChangeLog", s 133 134 class FileDiff: 135 """Class to represent changes in a single file.""" 136 137 def __init__(self, filename): 138 self.filename = filename 139 self.hunks = [] 140 self.clname, self.relname = get_parent_changelog(filename); 141 142 def dump(self): 143 print("Diff for %s:\n ChangeLog = %s\n rel name = %s\n" % (self.filename, self.clname, self.relname)) 144 for i, h in enumerate(self.hunks): 145 print("Next hunk %d:" % i) 146 h.dump() 147 148 class Hunk: 149 """Class to represent a single hunk of changes.""" 150 151 def __init__(self, hdr): 152 self.hdr = hdr 153 self.lines = [] 154 self.ctx_diff = is_ctx_hunk_start(hdr) 155 156 def dump(self): 157 print('%s' % self.hdr) 158 print('%s' % '\n'.join(self.lines)) 159 160 def is_file_addition(self): 161 """Does hunk describe addition of file?""" 162 if self.ctx_diff: 163 for line in self.lines: 164 if re.match(r'^\*\*\* 0 \*\*\*\*', line): 165 return True 166 else: 167 return re.match(r'^@@ -0,0 \+1.* @@', self.hdr) 168 169 def is_file_removal(self): 170 """Does hunk describe removal of file?""" 171 if self.ctx_diff: 172 for line in self.lines: 173 if re.match(r'^--- 0 ----', line): 174 return True 175 else: 176 return re.match(r'^@@ -1.* \+0,0 @@', self.hdr) 177 178 def is_file_diff_start(s): 179 # Don't be fooled by context diff line markers: 180 # *** 385,391 **** 181 return ((s.startswith('*** ') and not s.endswith('***')) 182 or (s.startswith('--- ') and not s.endswith('---'))) 183 184 def is_ctx_hunk_start(s): 185 return re.match(r'^\*\*\*\*\*\**', s) 186 187 def is_uni_hunk_start(s): 188 return re.match(r'^@@ .* @@', s) 189 190 def is_hunk_start(s): 191 return is_ctx_hunk_start(s) or is_uni_hunk_start(s) 192 193 def remove_suffixes(s): 194 if s.startswith('a/') or s.startswith('b/'): 195 s = s[2:] 196 if s.endswith('.jj'): 197 s = s[:-3] 198 return s 199 200 def find_changed_funs(hunk): 201 """Find all functions touched by hunk. We don't try too hard 202 to find good matches. This should return a superset 203 of the actual set of functions in the .diff file. 204 """ 205 206 fns = [] 207 fn = None 208 209 if (cache.match(r'^\*\*\*\*\*\** ([a-zA-Z0-9_].*)', hunk.hdr) 210 or cache.match(r'^@@ .* @@ ([a-zA-Z0-9_].*)', hunk.hdr)): 211 fn = cache.group(1) 212 213 for i, line in enumerate(hunk.lines): 214 # Context diffs have extra whitespace after first char; 215 # remove it to make matching easier. 216 if hunk.ctx_diff: 217 line = re.sub(r'^([-+! ]) ', r'\1', line) 218 219 # Remember most recent identifier in hunk 220 # that might be a function name. 221 if cache.match(r'^[-+! ]([a-zA-Z0-9_#].*)', line): 222 fn = cache.group(1) 223 224 change = line and re.match(r'^[-+!][^-]', line) 225 226 # Top-level comment cannot belong to function 227 if re.match(r'^[-+! ]\/\*', line): 228 fn = None 229 230 if change and fn: 231 if cache.match(r'^((class|struct|union|enum)\s+[a-zA-Z0-9_]+)', fn): 232 # Struct declaration 233 fn = cache.group(1) 234 elif cache.search(r'#\s*define\s+([a-zA-Z0-9_]+)', fn): 235 # Macro definition 236 fn = cache.group(1) 237 elif cache.match('^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)', fn): 238 # Supermacro 239 fn = cache.group(1) 240 elif cache.search(r'([a-zA-Z_][^()\s]*)\s*\([^*]', fn): 241 # Discard template and function parameters. 242 fn = cache.group(1) 243 fn = re.sub(r'<[^<>]*>', '', fn) 244 fn = fn.rstrip() 245 else: 246 fn = None 247 248 if fn and fn not in fns: # Avoid dups 249 fns.append(fn) 250 251 fn = None 252 253 return fns 254 255 def parse_patch(contents): 256 """Parse patch contents to a sequence of FileDiffs.""" 257 258 diffs = [] 259 260 lines = contents.split('\n') 261 262 i = 0 263 while i < len(lines): 264 line = lines[i] 265 266 # Diff headers look like 267 # --- a/gcc/tree.c 268 # +++ b/gcc/tree.c 269 # or 270 # *** gcc/cfgexpand.c 2013-12-25 20:07:24.800350058 +0400 271 # --- gcc/cfgexpand.c 2013-12-25 20:06:30.612350178 +0400 272 273 if is_file_diff_start(line): 274 left = re.split(r'\s+', line)[1] 275 else: 276 i += 1 277 continue 278 279 left = remove_suffixes(left); 280 281 i += 1 282 line = lines[i] 283 284 if not cache.match(r'^[+-][+-][+-] +(\S+)', line): 285 error("expected filename in line %d" % i) 286 right = remove_suffixes(cache.group(1)); 287 288 # Extract real file name from left and right names. 289 filename = None 290 if left == right: 291 filename = left 292 elif left == '/dev/null': 293 filename = right; 294 elif right == '/dev/null': 295 filename = left; 296 else: 297 comps = [] 298 while left and right: 299 left, l = os.path.split(left) 300 right, r = os.path.split(right) 301 if l != r: 302 break 303 comps.append(l) 304 305 if not comps: 306 error("failed to extract common name for %s and %s" % (left, right)) 307 308 comps.reverse() 309 filename = '/'.join(comps) 310 311 d = FileDiff(filename) 312 diffs.append(d) 313 314 # Collect hunks for current file. 315 hunk = None 316 i += 1 317 while i < len(lines): 318 line = lines[i] 319 320 # Create new hunk when we see hunk header 321 if is_hunk_start(line): 322 if hunk is not None: 323 d.hunks.append(hunk) 324 hunk = Hunk(line) 325 i += 1 326 continue 327 328 # Stop when we reach next diff 329 if (is_file_diff_start(line) 330 or line.startswith('diff ') 331 or line.startswith('Index: ')): 332 i -= 1 333 break 334 335 if hunk is not None: 336 hunk.lines.append(line) 337 i += 1 338 339 d.hunks.append(hunk) 340 341 return diffs 342 343 344 def get_pr_from_testcase(line): 345 r = pr_regex.search(line) 346 if r != None: 347 return r.group(3) 348 else: 349 return None 350 351 def main(): 352 name, email = read_user_info() 353 354 help_message = """\ 355 Generate ChangeLog template for PATCH. 356 PATCH must be generated using diff(1)'s -up or -cp options 357 (or their equivalent in Subversion/git). 358 """ 359 360 inline_message = """\ 361 Prepends ChangeLog to PATCH. 362 If PATCH is not stdin, modifies PATCH in-place, 363 otherwise writes to stdout.' 364 """ 365 366 parser = argparse.ArgumentParser(description = help_message) 367 parser.add_argument('-v', '--verbose', action = 'store_true', help = 'Verbose messages') 368 parser.add_argument('-i', '--inline', action = 'store_true', help = inline_message) 369 parser.add_argument('input', nargs = '?', help = 'Patch file (or missing, read standard input)') 370 args = parser.parse_args() 371 if args.input == '-': 372 args.input = None 373 input = open(args.input) if args.input else sys.stdin 374 contents = input.read() 375 diffs = parse_patch(contents) 376 377 if args.verbose: 378 print("Parse results:") 379 for d in diffs: 380 d.dump() 381 382 # Generate template ChangeLog. 383 384 logs = {} 385 prs = [] 386 for d in diffs: 387 log_name = d.clname 388 389 logs.setdefault(log_name, '') 390 logs[log_name] += '\t* %s' % d.relname 391 392 change_msg = '' 393 394 # Check if file was removed or added. 395 # Two patterns for context and unified diff. 396 if len(d.hunks) == 1: 397 hunk0 = d.hunks[0] 398 if hunk0.is_file_addition(): 399 if re.search(r'testsuite.*(?<!\.exp)$', d.filename): 400 change_msg = ': New test.\n' 401 pr = get_pr_from_testcase(hunk0.lines[0]) 402 if pr and pr not in prs: 403 prs.append(pr) 404 else: 405 change_msg = ": New file.\n" 406 elif hunk0.is_file_removal(): 407 change_msg = ": Remove.\n" 408 409 _, ext = os.path.splitext(d.filename) 410 if (not change_msg and ext in ['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def'] 411 and not 'testsuite' in d.filename): 412 fns = [] 413 for hunk in d.hunks: 414 for fn in find_changed_funs(hunk): 415 if fn not in fns: 416 fns.append(fn) 417 418 for fn in fns: 419 if change_msg: 420 change_msg += "\t(%s):\n" % fn 421 else: 422 change_msg = " (%s):\n" % fn 423 424 logs[log_name] += change_msg if change_msg else ":\n" 425 426 if args.inline and args.input: 427 # Get a temp filename, rather than an open filehandle, because we use 428 # the open to truncate. 429 fd, tmp = tempfile.mkstemp("tmp.XXXXXXXX") 430 os.close(fd) 431 432 # Copy permissions to temp file 433 # (old Pythons do not support shutil.copymode) 434 shutil.copymode(args.input, tmp) 435 436 # Open the temp file, clearing contents. 437 out = open(tmp, 'w') 438 else: 439 tmp = None 440 out = sys.stdout 441 442 # Print log 443 date = time.strftime('%Y-%m-%d') 444 bugmsg = '' 445 if len(prs): 446 bugmsg = '\n'.join(['\t' + pr for pr in prs]) + '\n' 447 448 for log_name, msg in sorted(logs.items()): 449 out.write("""\ 450 %s: 451 452 %s %s <%s> 453 454 %s%s\n""" % (log_name, date, name, email, bugmsg, msg)) 455 456 if args.inline: 457 # Append patch body 458 out.write(contents) 459 460 if args.input: 461 # Write new contents atomically 462 out.close() 463 shutil.move(tmp, args.input) 464 465 if __name__ == '__main__': 466 main() 467