1 1.1 mrg #!/usr/bin/env python3 2 1.1 mrg # 3 1.1.1.2 mrg # Check gcc.pot file for stylistic issues as described in 4 1.1.1.2 mrg # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html, 5 1.1.1.2 mrg # especially in gcc-internal-format messages. 6 1.1 mrg # 7 1.1 mrg # This file is part of GCC. 8 1.1 mrg # 9 1.1 mrg # GCC is free software; you can redistribute it and/or modify it under 10 1.1 mrg # the terms of the GNU General Public License as published by the Free 11 1.1 mrg # Software Foundation; either version 3, or (at your option) any later 12 1.1 mrg # version. 13 1.1 mrg # 14 1.1 mrg # GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 1.1 mrg # WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 1.1 mrg # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 1.1 mrg # for more details. 18 1.1 mrg # 19 1.1 mrg # You should have received a copy of the GNU General Public License 20 1.1 mrg # along with GCC; see the file COPYING3. If not see 21 1.1.1.2 mrg # <http://www.gnu.org/licenses/>. 22 1.1 mrg 23 1.1 mrg import argparse 24 1.1 mrg import re 25 1.1.1.2 mrg from collections import Counter 26 1.1.1.2 mrg from typing import Dict, Match 27 1.1.1.2 mrg 28 1.1.1.2 mrg import polib 29 1.1.1.2 mrg 30 1.1.1.2 mrg seen_warnings = Counter() 31 1.1.1.2 mrg 32 1.1.1.2 mrg 33 1.1.1.2 mrg def location(msg: polib.POEntry): 34 1.1.1.2 mrg if msg.occurrences: 35 1.1.1.2 mrg occ = msg.occurrences[0] 36 1.1.1.2 mrg return f'{occ[0]}:{occ[1]}' 37 1.1.1.2 mrg return '<unknown location>' 38 1.1.1.2 mrg 39 1.1.1.2 mrg 40 1.1.1.2 mrg def warn(msg: polib.POEntry, 41 1.1.1.2 mrg diagnostic_id: str, diagnostic: str, include_msgid=True): 42 1.1.1.2 mrg """ 43 1.1.1.2 mrg To suppress a warning for a particular message, 44 1.1.1.2 mrg add a line "#, gcclint:ignore:{diagnostic_id}" to the message. 45 1.1.1.2 mrg """ 46 1.1.1.2 mrg 47 1.1.1.2 mrg if f'gcclint:ignore:{diagnostic_id}' in msg.flags: 48 1.1.1.2 mrg return 49 1.1.1.2 mrg 50 1.1.1.2 mrg seen_warnings[diagnostic] += 1 51 1.1.1.2 mrg 52 1.1.1.2 mrg if include_msgid: 53 1.1.1.2 mrg print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}') 54 1.1.1.2 mrg else: 55 1.1.1.2 mrg print(f'{location(msg)}: {diagnostic}') 56 1.1.1.2 mrg 57 1.1.1.2 mrg 58 1.1.1.2 mrg def lint_gcc_internal_format(msg: polib.POEntry): 59 1.1.1.2 mrg """ 60 1.1.1.2 mrg Checks a single message that has the gcc-internal-format. These 61 1.1.1.2 mrg messages use a variety of placeholders like %qs, %<quotes%> and 62 1.1.1.2 mrg %q#E. 63 1.1.1.2 mrg """ 64 1.1.1.2 mrg 65 1.1.1.2 mrg msgid: str = msg.msgid 66 1.1.1.2 mrg 67 1.1.1.2 mrg def outside_quotes(m: Match[str]): 68 1.1.1.2 mrg before = msgid[:m.start(0)] 69 1.1.1.3 mrg return before.count('%<') == before.count('%>') 70 1.1.1.2 mrg 71 1.1.1.2 mrg def lint_matching_placeholders(): 72 1.1.1.2 mrg """ 73 1.1.1.2 mrg Warns when literal values in placeholders are not exactly equal 74 1.1.1.2 mrg in the translation. This can happen when doing copy-and-paste 75 1.1.1.2 mrg translations of similar messages. 76 1.1.1.2 mrg 77 1.1.1.2 mrg To avoid these mismatches in the first place, 78 1.1.1.2 mrg structurally equal messages are found by 79 1.1.1.2 mrg lint_diagnostics_differing_only_in_placeholders. 80 1.1.1.2 mrg 81 1.1.1.2 mrg This check only applies when checking a finished translation 82 1.1.1.2 mrg such as de.po, not gcc.pot. 83 1.1.1.2 mrg """ 84 1.1.1.2 mrg 85 1.1.1.2 mrg if not msg.translated(): 86 1.1.1.2 mrg return 87 1.1.1.2 mrg 88 1.1.1.2 mrg in_msgid = re.findall('%<[^%]+%>', msgid) 89 1.1.1.2 mrg in_msgstr = re.findall('%<[^%]+%>', msg.msgstr) 90 1.1.1.2 mrg 91 1.1.1.2 mrg if set(in_msgid) != set(in_msgstr): 92 1.1.1.2 mrg warn(msg, 93 1.1.1.2 mrg 'placeholder-mismatch', 94 1.1.1.2 mrg f'placeholder mismatch: msgid has {in_msgid}, ' 95 1.1.1.2 mrg f'msgstr has {in_msgstr}', 96 1.1.1.2 mrg include_msgid=False) 97 1.1.1.2 mrg 98 1.1.1.2 mrg def lint_option_outside_quotes(): 99 1.1.1.2 mrg for match in re.finditer(r'\S+', msgid): 100 1.1.1.2 mrg part = match.group() 101 1.1.1.2 mrg if not outside_quotes(match): 102 1.1.1.2 mrg continue 103 1.1.1.2 mrg 104 1.1.1.2 mrg if part.startswith('-'): 105 1.1.1.2 mrg if len(part) >= 2 and part[1].isalpha(): 106 1.1.1.2 mrg if part == '-INF': 107 1.1.1.2 mrg continue 108 1.1.1.2 mrg 109 1.1.1.2 mrg warn(msg, 110 1.1.1.2 mrg 'option-outside-quotes', 111 1.1.1.2 mrg 'command line option outside %<quotes%>') 112 1.1.1.2 mrg 113 1.1.1.2 mrg if part.startswith('__builtin_'): 114 1.1.1.2 mrg warn(msg, 115 1.1.1.2 mrg 'builtin-outside-quotes', 116 1.1.1.2 mrg 'builtin function outside %<quotes%>') 117 1.1.1.2 mrg 118 1.1.1.2 mrg def lint_plain_apostrophe(): 119 1.1.1.2 mrg for match in re.finditer("[^%]'", msgid): 120 1.1.1.2 mrg if outside_quotes(match): 121 1.1.1.2 mrg warn(msg, 'apostrophe', 'apostrophe without leading %') 122 1.1.1.2 mrg 123 1.1.1.2 mrg def lint_space_before_quote(): 124 1.1.1.2 mrg """ 125 1.1.1.2 mrg A space before %< is often the result of string literals that 126 1.1.1.2 mrg are joined by the C compiler and neither literal has a space 127 1.1.1.2 mrg to separate the words. 128 1.1.1.2 mrg """ 129 1.1.1.2 mrg 130 1.1.1.3 mrg for match in re.finditer('(.?[a-zA-Z0-9])%<', msgid): 131 1.1.1.2 mrg if match.group(1) != '%s': 132 1.1.1.2 mrg warn(msg, 133 1.1.1.2 mrg 'no-space-before-quote', 134 1.1.1.2 mrg '%< directly following a letter or digit') 135 1.1.1.2 mrg 136 1.1.1.2 mrg def lint_underscore_outside_quotes(): 137 1.1.1.2 mrg """ 138 1.1.1.2 mrg An underscore outside of quotes is used in several contexts, 139 1.1.1.2 mrg and many of them violate the GCC Guidelines for Diagnostics: 140 1.1.1.2 mrg 141 1.1.1.2 mrg * names of GCC-internal compiler functions 142 1.1.1.2 mrg * names of GCC-internal data structures 143 1.1.1.2 mrg * static_cast and the like (which are legitimate) 144 1.1.1.2 mrg """ 145 1.1.1.2 mrg 146 1.1.1.3 mrg for match in re.finditer('_', msgid): 147 1.1.1.2 mrg if outside_quotes(match): 148 1.1.1.2 mrg warn(msg, 149 1.1.1.2 mrg 'underscore-outside-quotes', 150 1.1.1.2 mrg 'underscore outside of %<quotes%>') 151 1.1.1.2 mrg return 152 1.1.1.2 mrg 153 1.1.1.2 mrg def lint_may_not(): 154 1.1.1.2 mrg """ 155 1.1.1.2 mrg The term "may not" may either mean "it could be the case" 156 1.1.1.2 mrg or "should not". These two different meanings are sometimes 157 1.1.1.2 mrg hard to tell apart. 158 1.1.1.2 mrg """ 159 1.1.1.2 mrg 160 1.1.1.2 mrg if re.search(r'\bmay not\b', msgid): 161 1.1.1.2 mrg warn(msg, 162 1.1.1.2 mrg 'ambiguous-may-not', 163 1.1.1.2 mrg 'the term "may not" is ambiguous') 164 1.1.1.2 mrg 165 1.1.1.2 mrg def lint_unbalanced_quotes(): 166 1.1.1.3 mrg if msgid.count('%<') != msgid.count('%>'): 167 1.1.1.2 mrg warn(msg, 168 1.1.1.2 mrg 'unbalanced-quotes', 169 1.1.1.2 mrg 'unbalanced %< and %> quotes') 170 1.1.1.2 mrg 171 1.1.1.2 mrg if msg.translated(): 172 1.1.1.3 mrg if msg.msgstr.count('%<') != msg.msgstr.count('%>'): 173 1.1.1.2 mrg warn(msg, 174 1.1.1.2 mrg 'unbalanced-quotes', 175 1.1.1.2 mrg 'unbalanced %< and %> quotes') 176 1.1.1.2 mrg 177 1.1.1.2 mrg def lint_single_space_after_sentence(): 178 1.1.1.2 mrg """ 179 1.1.1.2 mrg After a sentence there should be two spaces. 180 1.1.1.2 mrg """ 181 1.1.1.2 mrg 182 1.1.1.2 mrg if re.search(r'[.] [A-Z]', msgid): 183 1.1.1.2 mrg warn(msg, 184 1.1.1.2 mrg 'single-space-after-sentence', 185 1.1.1.2 mrg 'single space after sentence') 186 1.1.1.2 mrg 187 1.1.1.2 mrg def lint_non_canonical_quotes(): 188 1.1.1.2 mrg """ 189 1.1.1.2 mrg Catches %<%s%>, which can be written in the shorter form %qs. 190 1.1.1.2 mrg """ 191 1.1.1.2 mrg match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid) 192 1.1.1.2 mrg if match: 193 1.1.1.2 mrg warn(msg, 194 1.1.1.2 mrg 'non-canonical-quotes', 195 1.1.1.2 mrg f'placeholder {match.group()} should be written as %qs') 196 1.1.1.2 mrg 197 1.1.1.2 mrg lint_option_outside_quotes() 198 1.1.1.2 mrg lint_plain_apostrophe() 199 1.1.1.2 mrg lint_space_before_quote() 200 1.1.1.2 mrg lint_underscore_outside_quotes() 201 1.1.1.2 mrg lint_may_not() 202 1.1.1.2 mrg lint_unbalanced_quotes() 203 1.1.1.2 mrg lint_matching_placeholders() 204 1.1.1.2 mrg lint_single_space_after_sentence() 205 1.1.1.2 mrg lint_non_canonical_quotes() 206 1.1.1.2 mrg 207 1.1.1.2 mrg 208 1.1.1.2 mrg def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile): 209 1.1.1.2 mrg """ 210 1.1.1.2 mrg Detects messages that are structurally the same, except that they 211 1.1.1.2 mrg use different plain strings inside %<quotes%>. These messages can 212 1.1.1.2 mrg be merged in order to prevent copy-and-paste mistakes by the 213 1.1.1.2 mrg translators. 214 1.1.1.2 mrg 215 1.1.1.2 mrg See bug 90119. 216 1.1.1.2 mrg """ 217 1.1.1.2 mrg 218 1.1.1.2 mrg seen: Dict[str, polib.POEntry] = {} 219 1.1.1.2 mrg 220 1.1.1.2 mrg for msg in po: 221 1.1.1.2 mrg msg: polib.POEntry 222 1.1.1.2 mrg msgid = msg.msgid 223 1.1.1.2 mrg 224 1.1.1.2 mrg normalized = re.sub('%<[^%]+%>', '%qs', msgid) 225 1.1.1.2 mrg if normalized not in seen: 226 1.1.1.2 mrg seen[normalized] = msg 227 1.1.1.2 mrg seen[msgid] = msg 228 1.1.1.2 mrg continue 229 1.1.1.2 mrg 230 1.1.1.2 mrg prev = seen[normalized] 231 1.1.1.2 mrg warn(msg, 232 1.1.1.2 mrg 'same-pattern', 233 1.1.1.2 mrg f'same pattern for {repr(msgid)} and ' 234 1.1.1.2 mrg f'{repr(prev.msgid)} in {location(prev)}', 235 1.1.1.2 mrg include_msgid=False) 236 1.1.1.2 mrg 237 1.1.1.2 mrg 238 1.1.1.2 mrg def lint_file(po: polib.POFile): 239 1.1.1.2 mrg for msg in po: 240 1.1.1.2 mrg msg: polib.POEntry 241 1.1.1.2 mrg 242 1.1.1.2 mrg if not msg.obsolete and not msg.fuzzy: 243 1.1.1.2 mrg if 'gcc-internal-format' in msg.flags: 244 1.1.1.2 mrg lint_gcc_internal_format(msg) 245 1.1.1.2 mrg 246 1.1.1.2 mrg lint_diagnostics_differing_only_in_placeholders(po) 247 1.1.1.2 mrg 248 1.1.1.2 mrg 249 1.1.1.2 mrg def main(): 250 1.1.1.2 mrg parser = argparse.ArgumentParser(description='') 251 1.1.1.2 mrg parser.add_argument('file', help='pot file') 252 1.1 mrg 253 1.1.1.2 mrg args = parser.parse_args() 254 1.1 mrg 255 1.1.1.2 mrg po = polib.pofile(args.file) 256 1.1.1.2 mrg lint_file(po) 257 1.1 mrg 258 1.1.1.2 mrg print() 259 1.1.1.2 mrg print('summary:') 260 1.1.1.2 mrg for entry in seen_warnings.most_common(): 261 1.1.1.2 mrg if entry[1] > 1: 262 1.1.1.2 mrg print(f'{entry[1]}\t{entry[0]}') 263 1.1 mrg 264 1.1 mrg 265 1.1.1.2 mrg if __name__ == '__main__': 266 1.1.1.2 mrg main() 267