git_commit.py revision 1.1.1.2 1 1.1 mrg #!/usr/bin/env python3
2 1.1 mrg #
3 1.1 mrg # This file is part of GCC.
4 1.1 mrg #
5 1.1 mrg # GCC is free software; you can redistribute it and/or modify it under
6 1.1 mrg # the terms of the GNU General Public License as published by the Free
7 1.1 mrg # Software Foundation; either version 3, or (at your option) any later
8 1.1 mrg # version.
9 1.1 mrg #
10 1.1 mrg # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 1.1 mrg # WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 1.1 mrg # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 1.1 mrg # for more details.
14 1.1 mrg #
15 1.1 mrg # You should have received a copy of the GNU General Public License
16 1.1 mrg # along with GCC; see the file COPYING3. If not see
17 1.1 mrg # <http://www.gnu.org/licenses/>. */
18 1.1 mrg
19 1.1 mrg import difflib
20 1.1 mrg import os
21 1.1 mrg import re
22 1.1 mrg import sys
23 1.1 mrg
24 1.1 mrg default_changelog_locations = {
25 1.1 mrg 'c++tools',
26 1.1 mrg 'config',
27 1.1 mrg 'contrib',
28 1.1 mrg 'contrib/header-tools',
29 1.1 mrg 'contrib/reghunt',
30 1.1 mrg 'contrib/regression',
31 1.1 mrg 'fixincludes',
32 1.1 mrg 'gcc/ada',
33 1.1 mrg 'gcc/analyzer',
34 1.1 mrg 'gcc/brig',
35 1.1 mrg 'gcc/c',
36 1.1 mrg 'gcc/c-family',
37 1.1 mrg 'gcc',
38 1.1 mrg 'gcc/cp',
39 1.1 mrg 'gcc/d',
40 1.1 mrg 'gcc/fortran',
41 1.1 mrg 'gcc/go',
42 1.1 mrg 'gcc/jit',
43 1.1 mrg 'gcc/lto',
44 1.1 mrg 'gcc/objc',
45 1.1 mrg 'gcc/objcp',
46 1.1 mrg 'gcc/po',
47 1.1 mrg 'gcc/testsuite',
48 1.1 mrg 'gnattools',
49 1.1 mrg 'gotools',
50 1.1 mrg 'include',
51 1.1 mrg 'intl',
52 1.1 mrg 'libada',
53 1.1 mrg 'libatomic',
54 1.1 mrg 'libbacktrace',
55 1.1 mrg 'libcc1',
56 1.1 mrg 'libcody',
57 1.1 mrg 'libcpp',
58 1.1 mrg 'libcpp/po',
59 1.1 mrg 'libdecnumber',
60 1.1 mrg 'libffi',
61 1.1 mrg 'libgcc',
62 1.1 mrg 'libgcc/config/avr/libf7',
63 1.1 mrg 'libgcc/config/libbid',
64 1.1 mrg 'libgfortran',
65 1.1 mrg 'libgomp',
66 1.1 mrg 'libhsail-rt',
67 1.1 mrg 'libiberty',
68 1.1 mrg 'libitm',
69 1.1 mrg 'libobjc',
70 1.1 mrg 'liboffloadmic',
71 1.1 mrg 'libphobos',
72 1.1 mrg 'libquadmath',
73 1.1 mrg 'libsanitizer',
74 1.1 mrg 'libssp',
75 1.1 mrg 'libstdc++-v3',
76 1.1 mrg 'libvtv',
77 1.1 mrg 'lto-plugin',
78 1.1 mrg 'maintainer-scripts',
79 1.1 mrg 'zlib'}
80 1.1 mrg
81 1.1 mrg bug_components = {
82 1.1 mrg 'ada',
83 1.1 mrg 'analyzer',
84 1.1 mrg 'boehm-gc',
85 1.1 mrg 'bootstrap',
86 1.1 mrg 'c',
87 1.1 mrg 'c++',
88 1.1 mrg 'd',
89 1.1 mrg 'debug',
90 1.1 mrg 'demangler',
91 1.1 mrg 'driver',
92 1.1 mrg 'fastjar',
93 1.1 mrg 'fortran',
94 1.1 mrg 'gcov-profile',
95 1.1 mrg 'go',
96 1.1 mrg 'hsa',
97 1.1 mrg 'inline-asm',
98 1.1 mrg 'ipa',
99 1.1 mrg 'java',
100 1.1 mrg 'jit',
101 1.1 mrg 'libbacktrace',
102 1.1 mrg 'libf2c',
103 1.1 mrg 'libffi',
104 1.1 mrg 'libfortran',
105 1.1 mrg 'libgcc',
106 1.1 mrg 'libgcj',
107 1.1 mrg 'libgomp',
108 1.1 mrg 'libitm',
109 1.1 mrg 'libobjc',
110 1.1 mrg 'libquadmath',
111 1.1 mrg 'libstdc++',
112 1.1 mrg 'lto',
113 1.1 mrg 'middle-end',
114 1.1 mrg 'modula2',
115 1.1 mrg 'objc',
116 1.1 mrg 'objc++',
117 1.1 mrg 'other',
118 1.1 mrg 'pch',
119 1.1 mrg 'pending',
120 1.1 mrg 'plugins',
121 1.1 mrg 'preprocessor',
122 1.1 mrg 'regression',
123 1.1 mrg 'rtl-optimization',
124 1.1 mrg 'sanitizer',
125 1.1 mrg 'spam',
126 1.1 mrg 'target',
127 1.1 mrg 'testsuite',
128 1.1 mrg 'translation',
129 1.1 mrg 'tree-optimization',
130 1.1 mrg 'web'}
131 1.1 mrg
132 1.1 mrg ignored_prefixes = {
133 1.1 mrg 'gcc/d/dmd/',
134 1.1 mrg 'gcc/go/gofrontend/',
135 1.1 mrg 'gcc/testsuite/gdc.test/',
136 1.1 mrg 'gcc/testsuite/go.test/test/',
137 1.1 mrg 'libffi/',
138 1.1 mrg 'libgo/',
139 1.1 mrg 'libphobos/libdruntime/',
140 1.1 mrg 'libphobos/src/',
141 1.1 mrg 'libsanitizer/',
142 1.1 mrg }
143 1.1 mrg
144 1.1 mrg wildcard_prefixes = {
145 1.1 mrg 'gcc/testsuite/',
146 1.1 mrg 'libstdc++-v3/doc/html/',
147 1.1 mrg 'libstdc++-v3/testsuite/'
148 1.1 mrg }
149 1.1 mrg
150 1.1 mrg misc_files = {
151 1.1 mrg 'gcc/DATESTAMP',
152 1.1 mrg 'gcc/BASE-VER',
153 1.1 mrg 'gcc/DEV-PHASE'
154 1.1 mrg }
155 1.1 mrg
156 1.1 mrg author_line_regex = \
157 1.1 mrg re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
158 1.1 mrg additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
159 1.1 mrg changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
160 1.1 mrg subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
161 1.1 mrg subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
162 1.1 mrg pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
163 1.1 mrg dr_regex = re.compile(r'\tDR ([0-9]+)$')
164 1.1 mrg star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
165 1.1 mrg end_of_location_regex = re.compile(r'[\[<(:]')
166 1.1 mrg item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
167 1.1 mrg item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
168 1.1.1.2 mrg revert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
169 1.1 mrg cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
170 1.1 mrg
171 1.1 mrg LINE_LIMIT = 100
172 1.1 mrg TAB_WIDTH = 8
173 1.1 mrg CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
174 1.1 mrg
175 1.1 mrg REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
176 1.1 mrg 'acked-by: ', 'tested-by: ', 'reported-by: ',
177 1.1 mrg 'suggested-by: ')
178 1.1 mrg DATE_FORMAT = '%Y-%m-%d'
179 1.1 mrg
180 1.1 mrg
181 1.1 mrg def decode_path(path):
182 1.1 mrg # When core.quotepath is true (default value), utf8 chars are encoded like:
183 1.1 mrg # "b/ko\304\215ka.txt"
184 1.1 mrg #
185 1.1 mrg # The upstream bug is fixed:
186 1.1 mrg # https://github.com/gitpython-developers/GitPython/issues/1099
187 1.1 mrg #
188 1.1 mrg # but we still need a workaround for older versions of the library.
189 1.1 mrg # Please take a look at the explanation of the transformation:
190 1.1 mrg # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
191 1.1 mrg
192 1.1 mrg if path.startswith('"') and path.endswith('"'):
193 1.1 mrg return (path.strip('"').encode('utf8').decode('unicode-escape')
194 1.1 mrg .encode('latin-1').decode('utf8'))
195 1.1 mrg else:
196 1.1 mrg return path
197 1.1 mrg
198 1.1 mrg
199 1.1 mrg class Error:
200 1.1 mrg def __init__(self, message, line=None, details=None):
201 1.1 mrg self.message = message
202 1.1 mrg self.line = line
203 1.1 mrg self.details = details
204 1.1 mrg
205 1.1 mrg def __repr__(self):
206 1.1 mrg s = self.message
207 1.1 mrg if self.line:
208 1.1 mrg s += ': "%s"' % self.line
209 1.1 mrg return s
210 1.1 mrg
211 1.1 mrg
212 1.1 mrg class ChangeLogEntry:
213 1.1 mrg def __init__(self, folder, authors, prs):
214 1.1 mrg self.folder = folder
215 1.1 mrg # The 'list.copy()' function is not available before Python 3.3
216 1.1 mrg self.author_lines = list(authors)
217 1.1 mrg self.initial_prs = list(prs)
218 1.1 mrg self.prs = list(prs)
219 1.1 mrg self.lines = []
220 1.1 mrg self.files = []
221 1.1 mrg self.file_patterns = []
222 1.1 mrg self.parentheses_stack = []
223 1.1 mrg
224 1.1 mrg def parse_file_names(self):
225 1.1 mrg # Whether the content currently processed is between a star prefix the
226 1.1 mrg # end of the file list: a colon or an open paren.
227 1.1 mrg in_location = False
228 1.1 mrg
229 1.1 mrg for line in self.lines:
230 1.1 mrg # If this line matches the star prefix, start the location
231 1.1 mrg # processing on the information that follows the star.
232 1.1 mrg # Note that we need to skip macro names that can be in form of:
233 1.1 mrg #
234 1.1 mrg # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
235 1.1 mrg # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
236 1.1 mrg # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
237 1.1 mrg #
238 1.1 mrg m = star_prefix_regex.match(line)
239 1.1 mrg if m and len(m.group('spaces')) == 1:
240 1.1 mrg in_location = True
241 1.1 mrg line = m.group('content')
242 1.1 mrg
243 1.1 mrg if in_location:
244 1.1 mrg # Strip everything that is not a filename in "line":
245 1.1 mrg # entities "(NAME)", cases "<PATTERN>", conditions
246 1.1 mrg # "[COND]", entry text (the colon, if present, and
247 1.1 mrg # anything that follows it).
248 1.1 mrg m = end_of_location_regex.search(line)
249 1.1 mrg if m:
250 1.1 mrg line = line[:m.start()]
251 1.1 mrg in_location = False
252 1.1 mrg
253 1.1 mrg # At this point, all that's left is a list of filenames
254 1.1 mrg # separated by commas and whitespaces.
255 1.1 mrg for file in line.split(','):
256 1.1 mrg file = file.strip()
257 1.1 mrg if file:
258 1.1 mrg if file.endswith('*'):
259 1.1 mrg self.file_patterns.append(file[:-1])
260 1.1 mrg else:
261 1.1 mrg self.files.append(file)
262 1.1 mrg
263 1.1 mrg @property
264 1.1 mrg def datetime(self):
265 1.1 mrg for author in self.author_lines:
266 1.1 mrg if author[1]:
267 1.1 mrg return author[1]
268 1.1 mrg return None
269 1.1 mrg
270 1.1 mrg @property
271 1.1 mrg def authors(self):
272 1.1 mrg return [author_line[0] for author_line in self.author_lines]
273 1.1 mrg
274 1.1 mrg @property
275 1.1 mrg def is_empty(self):
276 1.1 mrg return not self.lines and self.prs == self.initial_prs
277 1.1 mrg
278 1.1 mrg def contains_author(self, author):
279 1.1 mrg for author_lines in self.author_lines:
280 1.1 mrg if author_lines[0] == author:
281 1.1 mrg return True
282 1.1 mrg return False
283 1.1 mrg
284 1.1 mrg
285 1.1 mrg class GitInfo:
286 1.1 mrg def __init__(self, hexsha, date, author, lines, modified_files):
287 1.1 mrg self.hexsha = hexsha
288 1.1 mrg self.date = date
289 1.1 mrg self.author = author
290 1.1 mrg self.lines = lines
291 1.1 mrg self.modified_files = modified_files
292 1.1 mrg
293 1.1 mrg
294 1.1 mrg class GitCommit:
295 1.1 mrg def __init__(self, info, commit_to_info_hook=None, ref_name=None):
296 1.1 mrg self.original_info = info
297 1.1 mrg self.info = info
298 1.1 mrg self.message = None
299 1.1 mrg self.changes = None
300 1.1 mrg self.changelog_entries = []
301 1.1 mrg self.errors = []
302 1.1 mrg self.top_level_authors = []
303 1.1 mrg self.co_authors = []
304 1.1 mrg self.top_level_prs = []
305 1.1 mrg self.subject_prs = set()
306 1.1 mrg self.cherry_pick_commit = None
307 1.1 mrg self.revert_commit = None
308 1.1 mrg self.commit_to_info_hook = commit_to_info_hook
309 1.1 mrg self.init_changelog_locations(ref_name)
310 1.1 mrg
311 1.1 mrg # Skip Update copyright years commits
312 1.1 mrg if self.info.lines and self.info.lines[0] == 'Update copyright years.':
313 1.1 mrg return
314 1.1 mrg
315 1.1 mrg if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
316 1.1 mrg self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
317 1.1 mrg
318 1.1 mrg # Identify first if the commit is a Revert commit
319 1.1 mrg for line in self.info.lines:
320 1.1.1.2 mrg m = revert_regex.fullmatch(line)
321 1.1 mrg if m:
322 1.1 mrg self.revert_commit = m.group('hash')
323 1.1 mrg break
324 1.1 mrg if self.revert_commit:
325 1.1 mrg self.info = self.commit_to_info_hook(self.revert_commit)
326 1.1 mrg
327 1.1 mrg # The following happens for get_email.py:
328 1.1 mrg if not self.info:
329 1.1 mrg return
330 1.1 mrg
331 1.1 mrg self.check_commit_email()
332 1.1 mrg
333 1.1 mrg # Extract PR numbers form the subject line
334 1.1 mrg # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
335 1.1 mrg if self.info.lines and not self.revert_commit:
336 1.1 mrg self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
337 1.1 mrg for m in subject_pr_regex.finditer(info.lines[0]):
338 1.1 mrg if not m.group('component') in bug_components:
339 1.1 mrg self.errors.append(Error('invalid PR component in subject', info.lines[0]))
340 1.1 mrg self.subject_prs.add(m.group('pr'))
341 1.1 mrg
342 1.1 mrg # Allow complete deletion of ChangeLog files in a commit
343 1.1 mrg project_files = [f for f in self.info.modified_files
344 1.1 mrg if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
345 1.1 mrg or f[0] in misc_files]
346 1.1 mrg ignored_files = [f for f in self.info.modified_files
347 1.1 mrg if self.in_ignored_location(f[0])]
348 1.1 mrg if len(project_files) == len(self.info.modified_files):
349 1.1 mrg # All modified files are only MISC files
350 1.1 mrg return
351 1.1 mrg elif project_files:
352 1.1 mrg err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
353 1.1 mrg 'should be done separately from normal commits\n' \
354 1.1 mrg '(note: ChangeLog entries will be automatically ' \
355 1.1 mrg 'added by a cron job)'
356 1.1 mrg self.errors.append(Error(err))
357 1.1 mrg return
358 1.1 mrg
359 1.1 mrg all_are_ignored = (len(project_files) + len(ignored_files)
360 1.1 mrg == len(self.info.modified_files))
361 1.1 mrg self.parse_lines(all_are_ignored)
362 1.1 mrg if self.changes:
363 1.1 mrg self.parse_changelog()
364 1.1 mrg self.parse_file_names()
365 1.1 mrg self.check_for_empty_description()
366 1.1 mrg self.check_for_broken_parentheses()
367 1.1 mrg self.deduce_changelog_locations()
368 1.1 mrg self.check_file_patterns()
369 1.1 mrg if not self.errors:
370 1.1 mrg self.check_mentioned_files()
371 1.1 mrg self.check_for_correct_changelog()
372 1.1 mrg if self.subject_prs:
373 1.1 mrg self.errors.append(Error('PR %s in subject but not in changelog' %
374 1.1 mrg ', '.join(self.subject_prs), self.info.lines[0]))
375 1.1 mrg
376 1.1 mrg @property
377 1.1 mrg def success(self):
378 1.1 mrg return not self.errors
379 1.1 mrg
380 1.1 mrg @property
381 1.1 mrg def new_files(self):
382 1.1 mrg return [x[0] for x in self.info.modified_files if x[1] == 'A']
383 1.1 mrg
384 1.1 mrg @classmethod
385 1.1 mrg def is_changelog_filename(cls, path, allow_suffix=False):
386 1.1 mrg basename = os.path.basename(path)
387 1.1 mrg if basename == 'ChangeLog':
388 1.1 mrg return True
389 1.1 mrg elif allow_suffix and basename.startswith('ChangeLog'):
390 1.1 mrg return True
391 1.1 mrg else:
392 1.1 mrg return False
393 1.1 mrg
394 1.1 mrg def find_changelog_location(self, name):
395 1.1 mrg if name.startswith('\t'):
396 1.1 mrg name = name[1:]
397 1.1 mrg if name.endswith(':'):
398 1.1 mrg name = name[:-1]
399 1.1 mrg if name.endswith('/'):
400 1.1 mrg name = name[:-1]
401 1.1 mrg return name if name in self.changelog_locations else None
402 1.1 mrg
403 1.1 mrg @classmethod
404 1.1 mrg def format_git_author(cls, author):
405 1.1 mrg assert '<' in author
406 1.1 mrg return author.replace('<', ' <')
407 1.1 mrg
408 1.1 mrg @classmethod
409 1.1 mrg def parse_git_name_status(cls, string):
410 1.1 mrg modified_files = []
411 1.1 mrg for entry in string.split('\n'):
412 1.1 mrg parts = entry.split('\t')
413 1.1 mrg t = parts[0]
414 1.1 mrg if t == 'A' or t == 'D' or t == 'M':
415 1.1 mrg modified_files.append((parts[1], t))
416 1.1 mrg elif t.startswith('R'):
417 1.1 mrg modified_files.append((parts[1], 'D'))
418 1.1 mrg modified_files.append((parts[2], 'A'))
419 1.1 mrg return modified_files
420 1.1 mrg
421 1.1 mrg def init_changelog_locations(self, ref_name):
422 1.1 mrg self.changelog_locations = list(default_changelog_locations)
423 1.1 mrg if ref_name:
424 1.1 mrg version = sys.maxsize
425 1.1 mrg if 'releases/gcc-' in ref_name:
426 1.1 mrg version = int(ref_name.split('-')[-1])
427 1.1 mrg if version >= 12:
428 1.1 mrg # HSA and BRIG were removed in GCC 12
429 1.1 mrg self.changelog_locations.remove('gcc/brig')
430 1.1 mrg self.changelog_locations.remove('libhsail-rt')
431 1.1 mrg
432 1.1 mrg def parse_lines(self, all_are_ignored):
433 1.1 mrg body = self.info.lines
434 1.1 mrg
435 1.1 mrg for i, b in enumerate(body):
436 1.1 mrg if not b:
437 1.1 mrg continue
438 1.1 mrg if (changelog_regex.match(b) or self.find_changelog_location(b)
439 1.1 mrg or star_prefix_regex.match(b) or pr_regex.match(b)
440 1.1 mrg or dr_regex.match(b) or author_line_regex.match(b)
441 1.1 mrg or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
442 1.1 mrg self.changes = body[i:]
443 1.1 mrg return
444 1.1 mrg if not all_are_ignored:
445 1.1 mrg self.errors.append(Error('cannot find a ChangeLog location in '
446 1.1 mrg 'message'))
447 1.1 mrg
448 1.1 mrg def parse_changelog(self):
449 1.1 mrg last_entry = None
450 1.1 mrg will_deduce = False
451 1.1 mrg for line in self.changes:
452 1.1 mrg if not line:
453 1.1 mrg if last_entry and will_deduce:
454 1.1 mrg last_entry = None
455 1.1 mrg continue
456 1.1 mrg if line != line.rstrip():
457 1.1 mrg self.errors.append(Error('trailing whitespace', line))
458 1.1 mrg if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
459 1.1 mrg # support long filenames
460 1.1 mrg if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
461 1.1 mrg self.errors.append(Error('line exceeds %d character limit'
462 1.1 mrg % LINE_LIMIT, line))
463 1.1 mrg m = changelog_regex.match(line)
464 1.1 mrg if m:
465 1.1 mrg last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
466 1.1 mrg self.top_level_authors,
467 1.1 mrg self.top_level_prs)
468 1.1 mrg self.changelog_entries.append(last_entry)
469 1.1 mrg elif self.find_changelog_location(line):
470 1.1 mrg last_entry = ChangeLogEntry(self.find_changelog_location(line),
471 1.1 mrg self.top_level_authors,
472 1.1 mrg self.top_level_prs)
473 1.1 mrg self.changelog_entries.append(last_entry)
474 1.1 mrg else:
475 1.1 mrg author_tuple = None
476 1.1 mrg pr_line = None
477 1.1 mrg if author_line_regex.match(line):
478 1.1 mrg m = author_line_regex.match(line)
479 1.1 mrg author_tuple = (m.group('name'), m.group('datetime'))
480 1.1 mrg elif additional_author_regex.match(line):
481 1.1 mrg m = additional_author_regex.match(line)
482 1.1 mrg if len(m.group('spaces')) != 4:
483 1.1 mrg msg = 'additional author must be indented with '\
484 1.1 mrg 'one tab and four spaces'
485 1.1 mrg self.errors.append(Error(msg, line))
486 1.1 mrg else:
487 1.1 mrg author_tuple = (m.group('name'), None)
488 1.1 mrg elif pr_regex.match(line):
489 1.1 mrg m = pr_regex.match(line)
490 1.1 mrg component = m.group('component')
491 1.1 mrg pr = m.group('pr')
492 1.1 mrg if not component:
493 1.1 mrg self.errors.append(Error('missing PR component', line))
494 1.1 mrg continue
495 1.1 mrg elif not component[:-1] in bug_components:
496 1.1 mrg self.errors.append(Error('invalid PR component', line))
497 1.1 mrg continue
498 1.1 mrg else:
499 1.1 mrg pr_line = line.lstrip()
500 1.1 mrg if pr in self.subject_prs:
501 1.1 mrg self.subject_prs.remove(pr)
502 1.1 mrg elif dr_regex.match(line):
503 1.1 mrg pr_line = line.lstrip()
504 1.1 mrg
505 1.1 mrg lowered_line = line.lower()
506 1.1 mrg if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
507 1.1 mrg name = line[len(CO_AUTHORED_BY_PREFIX):]
508 1.1 mrg author = self.format_git_author(name)
509 1.1 mrg self.co_authors.append(author)
510 1.1 mrg continue
511 1.1 mrg elif lowered_line.startswith(REVIEW_PREFIXES):
512 1.1 mrg continue
513 1.1 mrg else:
514 1.1 mrg m = cherry_pick_regex.search(line)
515 1.1 mrg if m:
516 1.1 mrg commit = m.group('hash')
517 1.1 mrg if self.cherry_pick_commit:
518 1.1 mrg msg = 'multiple cherry pick lines'
519 1.1 mrg self.errors.append(Error(msg, line))
520 1.1 mrg else:
521 1.1 mrg self.cherry_pick_commit = commit
522 1.1 mrg continue
523 1.1 mrg
524 1.1 mrg # ChangeLog name will be deduced later
525 1.1 mrg if not last_entry:
526 1.1 mrg if author_tuple:
527 1.1 mrg self.top_level_authors.append(author_tuple)
528 1.1 mrg continue
529 1.1 mrg elif pr_line:
530 1.1 mrg # append to top_level_prs only when we haven't met
531 1.1 mrg # a ChangeLog entry
532 1.1 mrg if (pr_line not in self.top_level_prs
533 1.1 mrg and not self.changelog_entries):
534 1.1 mrg self.top_level_prs.append(pr_line)
535 1.1 mrg continue
536 1.1 mrg else:
537 1.1 mrg last_entry = ChangeLogEntry(None,
538 1.1 mrg self.top_level_authors,
539 1.1 mrg self.top_level_prs)
540 1.1 mrg self.changelog_entries.append(last_entry)
541 1.1 mrg will_deduce = True
542 1.1 mrg elif author_tuple:
543 1.1 mrg if not last_entry.contains_author(author_tuple[0]):
544 1.1 mrg last_entry.author_lines.append(author_tuple)
545 1.1 mrg continue
546 1.1 mrg
547 1.1 mrg if not line.startswith('\t'):
548 1.1 mrg err = Error('line should start with a tab', line)
549 1.1 mrg self.errors.append(err)
550 1.1 mrg elif pr_line:
551 1.1 mrg last_entry.prs.append(pr_line)
552 1.1 mrg else:
553 1.1 mrg m = star_prefix_regex.match(line)
554 1.1 mrg if m:
555 1.1 mrg if (len(m.group('spaces')) != 1 and
556 1.1 mrg not last_entry.parentheses_stack):
557 1.1 mrg msg = 'one space should follow asterisk'
558 1.1 mrg self.errors.append(Error(msg, line))
559 1.1 mrg else:
560 1.1 mrg content = m.group('content')
561 1.1 mrg parts = content.split(':')
562 1.1 mrg if len(parts) > 1:
563 1.1 mrg for needle in ('()', '[]', '<>'):
564 1.1 mrg if ' ' + needle in parts[0]:
565 1.1 mrg msg = f'empty group "{needle}" found'
566 1.1 mrg self.errors.append(Error(msg, line))
567 1.1 mrg last_entry.lines.append(line)
568 1.1 mrg self.process_parentheses(last_entry, line)
569 1.1 mrg else:
570 1.1 mrg if last_entry.is_empty:
571 1.1 mrg msg = 'first line should start with a tab, ' \
572 1.1 mrg 'an asterisk and a space'
573 1.1 mrg self.errors.append(Error(msg, line))
574 1.1 mrg else:
575 1.1 mrg last_entry.lines.append(line)
576 1.1 mrg self.process_parentheses(last_entry, line)
577 1.1 mrg
578 1.1 mrg def process_parentheses(self, last_entry, line):
579 1.1 mrg for c in line:
580 1.1 mrg if c == '(':
581 1.1 mrg last_entry.parentheses_stack.append(line)
582 1.1 mrg elif c == ')':
583 1.1 mrg if not last_entry.parentheses_stack:
584 1.1 mrg msg = 'bad wrapping of parenthesis'
585 1.1 mrg self.errors.append(Error(msg, line))
586 1.1 mrg else:
587 1.1 mrg del last_entry.parentheses_stack[-1]
588 1.1 mrg
589 1.1 mrg def parse_file_names(self):
590 1.1 mrg for entry in self.changelog_entries:
591 1.1 mrg entry.parse_file_names()
592 1.1 mrg
593 1.1 mrg def check_file_patterns(self):
594 1.1 mrg for entry in self.changelog_entries:
595 1.1 mrg for pattern in entry.file_patterns:
596 1.1 mrg name = os.path.join(entry.folder, pattern)
597 1.1 mrg if not [name.startswith(pr) for pr in wildcard_prefixes]:
598 1.1 mrg msg = 'unsupported wildcard prefix'
599 1.1 mrg self.errors.append(Error(msg, name))
600 1.1 mrg
601 1.1 mrg def check_for_empty_description(self):
602 1.1 mrg for entry in self.changelog_entries:
603 1.1 mrg for i, line in enumerate(entry.lines):
604 1.1 mrg if (item_empty_regex.match(line) and
605 1.1 mrg (i == len(entry.lines) - 1
606 1.1 mrg or not entry.lines[i+1].strip()
607 1.1 mrg or item_parenthesis_regex.match(entry.lines[i+1]))):
608 1.1 mrg msg = 'missing description of a change'
609 1.1 mrg self.errors.append(Error(msg, line))
610 1.1 mrg
611 1.1 mrg def check_for_broken_parentheses(self):
612 1.1 mrg for entry in self.changelog_entries:
613 1.1 mrg if entry.parentheses_stack:
614 1.1 mrg msg = 'bad parentheses wrapping'
615 1.1 mrg self.errors.append(Error(msg, entry.parentheses_stack[-1]))
616 1.1 mrg
617 1.1 mrg def get_file_changelog_location(self, changelog_file):
618 1.1 mrg for file in self.info.modified_files:
619 1.1 mrg if file[0] == changelog_file:
620 1.1 mrg # root ChangeLog file
621 1.1 mrg return ''
622 1.1 mrg index = file[0].find('/' + changelog_file)
623 1.1 mrg if index != -1:
624 1.1 mrg return file[0][:index]
625 1.1 mrg return None
626 1.1 mrg
627 1.1 mrg def deduce_changelog_locations(self):
628 1.1 mrg for entry in self.changelog_entries:
629 1.1 mrg if not entry.folder:
630 1.1 mrg changelog = None
631 1.1 mrg for file in entry.files:
632 1.1 mrg location = self.get_file_changelog_location(file)
633 1.1 mrg if (location == ''
634 1.1 mrg or (location and location in self.changelog_locations)):
635 1.1 mrg if changelog and changelog != location:
636 1.1 mrg msg = 'could not deduce ChangeLog file, ' \
637 1.1 mrg 'not unique location'
638 1.1 mrg self.errors.append(Error(msg))
639 1.1 mrg return
640 1.1 mrg changelog = location
641 1.1 mrg if changelog is not None:
642 1.1 mrg entry.folder = changelog
643 1.1 mrg else:
644 1.1 mrg msg = 'could not deduce ChangeLog file'
645 1.1 mrg self.errors.append(Error(msg))
646 1.1 mrg
647 1.1 mrg @classmethod
648 1.1 mrg def in_ignored_location(cls, path):
649 1.1 mrg for ignored in ignored_prefixes:
650 1.1 mrg if path.startswith(ignored):
651 1.1 mrg return True
652 1.1 mrg return False
653 1.1 mrg
654 1.1 mrg def get_changelog_by_path(self, path):
655 1.1 mrg components = path.split('/')
656 1.1 mrg while components:
657 1.1 mrg if '/'.join(components) in self.changelog_locations:
658 1.1 mrg break
659 1.1 mrg components = components[:-1]
660 1.1 mrg return '/'.join(components)
661 1.1 mrg
662 1.1 mrg def check_mentioned_files(self):
663 1.1 mrg folder_count = len([x.folder for x in self.changelog_entries])
664 1.1 mrg assert folder_count == len(self.changelog_entries)
665 1.1 mrg
666 1.1 mrg mentioned_files = set()
667 1.1 mrg mentioned_patterns = []
668 1.1 mrg used_patterns = set()
669 1.1 mrg for entry in self.changelog_entries:
670 1.1 mrg if not entry.files and not entry.file_patterns:
671 1.1 mrg msg = 'no files mentioned for ChangeLog in directory'
672 1.1 mrg self.errors.append(Error(msg, entry.folder))
673 1.1 mrg assert not entry.folder.endswith('/')
674 1.1 mrg for file in entry.files:
675 1.1 mrg if not self.is_changelog_filename(file):
676 1.1 mrg item = os.path.join(entry.folder, file)
677 1.1 mrg if item in mentioned_files:
678 1.1 mrg msg = 'same file specified multiple times'
679 1.1 mrg self.errors.append(Error(msg, file))
680 1.1 mrg else:
681 1.1 mrg mentioned_files.add(item)
682 1.1 mrg for pattern in entry.file_patterns:
683 1.1 mrg mentioned_patterns.append(os.path.join(entry.folder, pattern))
684 1.1 mrg
685 1.1 mrg cand = [x[0] for x in self.info.modified_files
686 1.1 mrg if not self.is_changelog_filename(x[0])]
687 1.1 mrg changed_files = set(cand)
688 1.1 mrg for file in sorted(mentioned_files - changed_files):
689 1.1 mrg msg = 'unchanged file mentioned in a ChangeLog'
690 1.1 mrg candidates = difflib.get_close_matches(file, changed_files, 1)
691 1.1 mrg details = None
692 1.1 mrg if candidates:
693 1.1 mrg msg += f' (did you mean "{candidates[0]}"?)'
694 1.1 mrg details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
695 1.1 mrg self.errors.append(Error(msg, file, details))
696 1.1 mrg for file in sorted(changed_files - mentioned_files):
697 1.1 mrg if not self.in_ignored_location(file):
698 1.1 mrg if file in self.new_files:
699 1.1 mrg changelog_location = self.get_changelog_by_path(file)
700 1.1 mrg # Python2: we cannot use next(filter(...))
701 1.1 mrg entries = filter(lambda x: x.folder == changelog_location,
702 1.1 mrg self.changelog_entries)
703 1.1 mrg entries = list(entries)
704 1.1 mrg entry = entries[0] if entries else None
705 1.1 mrg if not entry:
706 1.1 mrg prs = self.top_level_prs
707 1.1 mrg if not prs:
708 1.1 mrg # if all ChangeLog entries have identical PRs
709 1.1 mrg # then use them
710 1.1 mrg prs = self.changelog_entries[0].prs
711 1.1 mrg for entry in self.changelog_entries:
712 1.1 mrg if entry.prs != prs:
713 1.1 mrg prs = []
714 1.1 mrg break
715 1.1 mrg entry = ChangeLogEntry(changelog_location,
716 1.1 mrg self.top_level_authors,
717 1.1 mrg prs)
718 1.1 mrg self.changelog_entries.append(entry)
719 1.1 mrg # strip prefix of the file
720 1.1 mrg assert file.startswith(entry.folder)
721 1.1 mrg # do not allow auto-addition of New files
722 1.1 mrg # for the top-level folder
723 1.1 mrg if entry.folder:
724 1.1 mrg file = file[len(entry.folder):].lstrip('/')
725 1.1 mrg entry.lines.append('\t* %s: New file.' % file)
726 1.1 mrg entry.files.append(file)
727 1.1 mrg else:
728 1.1 mrg msg = 'new file in the top-level folder not mentioned in a ChangeLog'
729 1.1 mrg self.errors.append(Error(msg, file))
730 1.1 mrg else:
731 1.1 mrg used_pattern = [p for p in mentioned_patterns
732 1.1 mrg if file.startswith(p)]
733 1.1 mrg used_pattern = used_pattern[0] if used_pattern else None
734 1.1 mrg if used_pattern:
735 1.1 mrg used_patterns.add(used_pattern)
736 1.1 mrg else:
737 1.1 mrg msg = 'changed file not mentioned in a ChangeLog'
738 1.1 mrg self.errors.append(Error(msg, file))
739 1.1 mrg
740 1.1 mrg for pattern in mentioned_patterns:
741 1.1 mrg if pattern not in used_patterns:
742 1.1 mrg error = "pattern doesn't match any changed files"
743 1.1 mrg self.errors.append(Error(error, pattern))
744 1.1 mrg
745 1.1 mrg def check_for_correct_changelog(self):
746 1.1 mrg for entry in self.changelog_entries:
747 1.1 mrg for file in entry.files:
748 1.1 mrg full_path = os.path.join(entry.folder, file)
749 1.1 mrg changelog_location = self.get_changelog_by_path(full_path)
750 1.1 mrg if changelog_location != entry.folder:
751 1.1 mrg msg = 'wrong ChangeLog location "%s", should be "%s"'
752 1.1 mrg err = Error(msg % (entry.folder, changelog_location), file)
753 1.1 mrg self.errors.append(err)
754 1.1 mrg
755 1.1 mrg @classmethod
756 1.1 mrg def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
757 1.1 mrg output = ''
758 1.1 mrg for i, author in enumerate(authors):
759 1.1 mrg if i == 0:
760 1.1 mrg output += '%s%s %s\n' % (prefix, timestamp, author)
761 1.1 mrg else:
762 1.1 mrg output += '%s\t %s\n' % (prefix, author)
763 1.1 mrg output += '\n'
764 1.1 mrg return output
765 1.1 mrg
766 1.1 mrg def to_changelog_entries(self, use_commit_ts=False):
767 1.1 mrg current_timestamp = self.info.date.strftime(DATE_FORMAT)
768 1.1 mrg for entry in self.changelog_entries:
769 1.1 mrg output = ''
770 1.1 mrg timestamp = entry.datetime
771 1.1 mrg if self.revert_commit:
772 1.1 mrg timestamp = current_timestamp
773 1.1 mrg orig_date = self.original_info.date
774 1.1 mrg current_timestamp = orig_date.strftime(DATE_FORMAT)
775 1.1 mrg elif self.cherry_pick_commit:
776 1.1 mrg info = self.commit_to_info_hook(self.cherry_pick_commit)
777 1.1 mrg # it can happen that it is a cherry-pick for a different
778 1.1 mrg # repository
779 1.1 mrg if info:
780 1.1 mrg timestamp = info.date.strftime(DATE_FORMAT)
781 1.1 mrg else:
782 1.1 mrg timestamp = current_timestamp
783 1.1 mrg elif not timestamp or use_commit_ts:
784 1.1 mrg timestamp = current_timestamp
785 1.1 mrg authors = entry.authors if entry.authors else [self.info.author]
786 1.1 mrg # add Co-Authored-By authors to all ChangeLog entries
787 1.1 mrg for author in self.co_authors:
788 1.1 mrg if author not in authors:
789 1.1 mrg authors.append(author)
790 1.1 mrg
791 1.1 mrg if self.cherry_pick_commit or self.revert_commit:
792 1.1 mrg original_author = self.original_info.author
793 1.1 mrg output += self.format_authors_in_changelog([original_author],
794 1.1 mrg current_timestamp)
795 1.1 mrg if self.revert_commit:
796 1.1 mrg output += '\tRevert:\n'
797 1.1 mrg else:
798 1.1 mrg output += '\tBackported from master:\n'
799 1.1 mrg output += self.format_authors_in_changelog(authors,
800 1.1 mrg timestamp, '\t')
801 1.1 mrg else:
802 1.1 mrg output += self.format_authors_in_changelog(authors, timestamp)
803 1.1 mrg for pr in entry.prs:
804 1.1 mrg output += '\t%s\n' % pr
805 1.1 mrg for line in entry.lines:
806 1.1 mrg output += line + '\n'
807 1.1 mrg yield (entry.folder, output.rstrip())
808 1.1 mrg
809 1.1 mrg def print_output(self):
810 1.1 mrg for entry, output in self.to_changelog_entries():
811 1.1 mrg print('------ %s/ChangeLog ------ ' % entry)
812 1.1 mrg print(output)
813 1.1 mrg
814 1.1 mrg def print_errors(self):
815 1.1 mrg print('Errors:')
816 1.1 mrg for error in self.errors:
817 1.1 mrg print(error)
818 1.1 mrg
819 1.1 mrg def check_commit_email(self):
820 1.1 mrg # Parse 'Martin Liska <mliska (at] suse.cz>'
821 1.1 mrg email = self.info.author.split(' ')[-1].strip('<>')
822 1.1 mrg
823 1.1 mrg # Verify that all characters are ASCII
824 1.1 mrg # TODO: Python 3.7 provides a nicer function: isascii
825 1.1 mrg if len(email) != len(email.encode()):
826 1.1 mrg self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))
827