Home | History | Annotate | Line # | Download | only in contrib
      1      1.1  mrg #!/usr/bin/env python3
      2      1.1  mrg #
      3  1.1.1.2  mrg # Check gcc.pot file for stylistic issues as described in
      4  1.1.1.2  mrg # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html,
      5  1.1.1.2  mrg # especially in gcc-internal-format messages.
      6      1.1  mrg #
      7      1.1  mrg # This file is part of GCC.
      8      1.1  mrg #
      9      1.1  mrg # GCC is free software; you can redistribute it and/or modify it under
     10      1.1  mrg # the terms of the GNU General Public License as published by the Free
     11      1.1  mrg # Software Foundation; either version 3, or (at your option) any later
     12      1.1  mrg # version.
     13      1.1  mrg #
     14      1.1  mrg # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     15      1.1  mrg # WARRANTY; without even the implied warranty of MERCHANTABILITY or
     16      1.1  mrg # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     17      1.1  mrg # for more details.
     18      1.1  mrg #
     19      1.1  mrg # You should have received a copy of the GNU General Public License
     20      1.1  mrg # along with GCC; see the file COPYING3.  If not see
     21  1.1.1.2  mrg # <http://www.gnu.org/licenses/>.
     22      1.1  mrg 
     23      1.1  mrg import argparse
     24      1.1  mrg import re
     25  1.1.1.2  mrg from collections import Counter
     26  1.1.1.2  mrg from typing import Dict, Match
     27  1.1.1.2  mrg 
     28  1.1.1.2  mrg import polib
     29  1.1.1.2  mrg 
     30  1.1.1.2  mrg seen_warnings = Counter()
     31  1.1.1.2  mrg 
     32  1.1.1.2  mrg 
     33  1.1.1.2  mrg def location(msg: polib.POEntry):
     34  1.1.1.2  mrg     if msg.occurrences:
     35  1.1.1.2  mrg         occ = msg.occurrences[0]
     36  1.1.1.2  mrg         return f'{occ[0]}:{occ[1]}'
     37  1.1.1.2  mrg     return '<unknown location>'
     38  1.1.1.2  mrg 
     39  1.1.1.2  mrg 
     40  1.1.1.2  mrg def warn(msg: polib.POEntry,
     41  1.1.1.2  mrg          diagnostic_id: str, diagnostic: str, include_msgid=True):
     42  1.1.1.2  mrg     """
     43  1.1.1.2  mrg     To suppress a warning for a particular message,
     44  1.1.1.2  mrg     add a line "#, gcclint:ignore:{diagnostic_id}" to the message.
     45  1.1.1.2  mrg     """
     46  1.1.1.2  mrg 
     47  1.1.1.2  mrg     if f'gcclint:ignore:{diagnostic_id}' in msg.flags:
     48  1.1.1.2  mrg         return
     49  1.1.1.2  mrg 
     50  1.1.1.2  mrg     seen_warnings[diagnostic] += 1
     51  1.1.1.2  mrg 
     52  1.1.1.2  mrg     if include_msgid:
     53  1.1.1.2  mrg         print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}')
     54  1.1.1.2  mrg     else:
     55  1.1.1.2  mrg         print(f'{location(msg)}: {diagnostic}')
     56  1.1.1.2  mrg 
     57  1.1.1.2  mrg 
     58  1.1.1.2  mrg def lint_gcc_internal_format(msg: polib.POEntry):
     59  1.1.1.2  mrg     """
     60  1.1.1.2  mrg     Checks a single message that has the gcc-internal-format. These
     61  1.1.1.2  mrg     messages use a variety of placeholders like %qs, %<quotes%> and
     62  1.1.1.2  mrg     %q#E.
     63  1.1.1.2  mrg     """
     64  1.1.1.2  mrg 
     65  1.1.1.2  mrg     msgid: str = msg.msgid
     66  1.1.1.2  mrg 
     67  1.1.1.2  mrg     def outside_quotes(m: Match[str]):
     68  1.1.1.2  mrg         before = msgid[:m.start(0)]
     69  1.1.1.3  mrg         return before.count('%<') == before.count('%>')
     70  1.1.1.2  mrg 
     71  1.1.1.2  mrg     def lint_matching_placeholders():
     72  1.1.1.2  mrg         """
     73  1.1.1.2  mrg         Warns when literal values in placeholders are not exactly equal
     74  1.1.1.2  mrg         in the translation. This can happen when doing copy-and-paste
     75  1.1.1.2  mrg         translations of similar messages.
     76  1.1.1.2  mrg 
     77  1.1.1.2  mrg         To avoid these mismatches in the first place,
     78  1.1.1.2  mrg         structurally equal messages are found by
     79  1.1.1.2  mrg         lint_diagnostics_differing_only_in_placeholders.
     80  1.1.1.2  mrg 
     81  1.1.1.2  mrg         This check only applies when checking a finished translation
     82  1.1.1.2  mrg         such as de.po, not gcc.pot.
     83  1.1.1.2  mrg         """
     84  1.1.1.2  mrg 
     85  1.1.1.2  mrg         if not msg.translated():
     86  1.1.1.2  mrg             return
     87  1.1.1.2  mrg 
     88  1.1.1.2  mrg         in_msgid = re.findall('%<[^%]+%>', msgid)
     89  1.1.1.2  mrg         in_msgstr = re.findall('%<[^%]+%>', msg.msgstr)
     90  1.1.1.2  mrg 
     91  1.1.1.2  mrg         if set(in_msgid) != set(in_msgstr):
     92  1.1.1.2  mrg             warn(msg,
     93  1.1.1.2  mrg                  'placeholder-mismatch',
     94  1.1.1.2  mrg                  f'placeholder mismatch: msgid has {in_msgid}, '
     95  1.1.1.2  mrg                  f'msgstr has {in_msgstr}',
     96  1.1.1.2  mrg                  include_msgid=False)
     97  1.1.1.2  mrg 
     98  1.1.1.2  mrg     def lint_option_outside_quotes():
     99  1.1.1.2  mrg         for match in re.finditer(r'\S+', msgid):
    100  1.1.1.2  mrg             part = match.group()
    101  1.1.1.2  mrg             if not outside_quotes(match):
    102  1.1.1.2  mrg                 continue
    103  1.1.1.2  mrg 
    104  1.1.1.2  mrg             if part.startswith('-'):
    105  1.1.1.2  mrg                 if len(part) >= 2 and part[1].isalpha():
    106  1.1.1.2  mrg                     if part == '-INF':
    107  1.1.1.2  mrg                         continue
    108  1.1.1.2  mrg 
    109  1.1.1.2  mrg                     warn(msg,
    110  1.1.1.2  mrg                          'option-outside-quotes',
    111  1.1.1.2  mrg                          'command line option outside %<quotes%>')
    112  1.1.1.2  mrg 
    113  1.1.1.2  mrg             if part.startswith('__builtin_'):
    114  1.1.1.2  mrg                 warn(msg,
    115  1.1.1.2  mrg                      'builtin-outside-quotes',
    116  1.1.1.2  mrg                      'builtin function outside %<quotes%>')
    117  1.1.1.2  mrg 
    118  1.1.1.2  mrg     def lint_plain_apostrophe():
    119  1.1.1.2  mrg         for match in re.finditer("[^%]'", msgid):
    120  1.1.1.2  mrg             if outside_quotes(match):
    121  1.1.1.2  mrg                 warn(msg, 'apostrophe', 'apostrophe without leading %')
    122  1.1.1.2  mrg 
    123  1.1.1.2  mrg     def lint_space_before_quote():
    124  1.1.1.2  mrg         """
    125  1.1.1.2  mrg         A space before %< is often the result of string literals that
    126  1.1.1.2  mrg         are joined by the C compiler and neither literal has a space
    127  1.1.1.2  mrg         to separate the words.
    128  1.1.1.2  mrg         """
    129  1.1.1.2  mrg 
    130  1.1.1.3  mrg         for match in re.finditer('(.?[a-zA-Z0-9])%<', msgid):
    131  1.1.1.2  mrg             if match.group(1) != '%s':
    132  1.1.1.2  mrg                 warn(msg,
    133  1.1.1.2  mrg                      'no-space-before-quote',
    134  1.1.1.2  mrg                      '%< directly following a letter or digit')
    135  1.1.1.2  mrg 
    136  1.1.1.2  mrg     def lint_underscore_outside_quotes():
    137  1.1.1.2  mrg         """
    138  1.1.1.2  mrg         An underscore outside of quotes is used in several contexts,
    139  1.1.1.2  mrg         and many of them violate the GCC Guidelines for Diagnostics:
    140  1.1.1.2  mrg 
    141  1.1.1.2  mrg         * names of GCC-internal compiler functions
    142  1.1.1.2  mrg         * names of GCC-internal data structures
    143  1.1.1.2  mrg         * static_cast and the like (which are legitimate)
    144  1.1.1.2  mrg         """
    145  1.1.1.2  mrg 
    146  1.1.1.3  mrg         for match in re.finditer('_', msgid):
    147  1.1.1.2  mrg             if outside_quotes(match):
    148  1.1.1.2  mrg                 warn(msg,
    149  1.1.1.2  mrg                      'underscore-outside-quotes',
    150  1.1.1.2  mrg                      'underscore outside of %<quotes%>')
    151  1.1.1.2  mrg                 return
    152  1.1.1.2  mrg 
    153  1.1.1.2  mrg     def lint_may_not():
    154  1.1.1.2  mrg         """
    155  1.1.1.2  mrg         The term "may not" may either mean "it could be the case"
    156  1.1.1.2  mrg         or "should not". These two different meanings are sometimes
    157  1.1.1.2  mrg         hard to tell apart.
    158  1.1.1.2  mrg         """
    159  1.1.1.2  mrg 
    160  1.1.1.2  mrg         if re.search(r'\bmay not\b', msgid):
    161  1.1.1.2  mrg             warn(msg,
    162  1.1.1.2  mrg                  'ambiguous-may-not',
    163  1.1.1.2  mrg                  'the term "may not" is ambiguous')
    164  1.1.1.2  mrg 
    165  1.1.1.2  mrg     def lint_unbalanced_quotes():
    166  1.1.1.3  mrg         if msgid.count('%<') != msgid.count('%>'):
    167  1.1.1.2  mrg             warn(msg,
    168  1.1.1.2  mrg                  'unbalanced-quotes',
    169  1.1.1.2  mrg                  'unbalanced %< and %> quotes')
    170  1.1.1.2  mrg 
    171  1.1.1.2  mrg         if msg.translated():
    172  1.1.1.3  mrg             if msg.msgstr.count('%<') != msg.msgstr.count('%>'):
    173  1.1.1.2  mrg                 warn(msg,
    174  1.1.1.2  mrg                      'unbalanced-quotes',
    175  1.1.1.2  mrg                      'unbalanced %< and %> quotes')
    176  1.1.1.2  mrg 
    177  1.1.1.2  mrg     def lint_single_space_after_sentence():
    178  1.1.1.2  mrg         """
    179  1.1.1.2  mrg         After a sentence there should be two spaces.
    180  1.1.1.2  mrg         """
    181  1.1.1.2  mrg 
    182  1.1.1.2  mrg         if re.search(r'[.] [A-Z]', msgid):
    183  1.1.1.2  mrg             warn(msg,
    184  1.1.1.2  mrg                  'single-space-after-sentence',
    185  1.1.1.2  mrg                  'single space after sentence')
    186  1.1.1.2  mrg 
    187  1.1.1.2  mrg     def lint_non_canonical_quotes():
    188  1.1.1.2  mrg         """
    189  1.1.1.2  mrg         Catches %<%s%>, which can be written in the shorter form %qs.
    190  1.1.1.2  mrg         """
    191  1.1.1.2  mrg         match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid)
    192  1.1.1.2  mrg         if match:
    193  1.1.1.2  mrg             warn(msg,
    194  1.1.1.2  mrg                  'non-canonical-quotes',
    195  1.1.1.2  mrg                  f'placeholder {match.group()} should be written as %qs')
    196  1.1.1.2  mrg 
    197  1.1.1.2  mrg     lint_option_outside_quotes()
    198  1.1.1.2  mrg     lint_plain_apostrophe()
    199  1.1.1.2  mrg     lint_space_before_quote()
    200  1.1.1.2  mrg     lint_underscore_outside_quotes()
    201  1.1.1.2  mrg     lint_may_not()
    202  1.1.1.2  mrg     lint_unbalanced_quotes()
    203  1.1.1.2  mrg     lint_matching_placeholders()
    204  1.1.1.2  mrg     lint_single_space_after_sentence()
    205  1.1.1.2  mrg     lint_non_canonical_quotes()
    206  1.1.1.2  mrg 
    207  1.1.1.2  mrg 
    208  1.1.1.2  mrg def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile):
    209  1.1.1.2  mrg     """
    210  1.1.1.2  mrg     Detects messages that are structurally the same, except that they
    211  1.1.1.2  mrg     use different plain strings inside %<quotes%>. These messages can
    212  1.1.1.2  mrg     be merged in order to prevent copy-and-paste mistakes by the
    213  1.1.1.2  mrg     translators.
    214  1.1.1.2  mrg 
    215  1.1.1.2  mrg     See bug 90119.
    216  1.1.1.2  mrg     """
    217  1.1.1.2  mrg 
    218  1.1.1.2  mrg     seen: Dict[str, polib.POEntry] = {}
    219  1.1.1.2  mrg 
    220  1.1.1.2  mrg     for msg in po:
    221  1.1.1.2  mrg         msg: polib.POEntry
    222  1.1.1.2  mrg         msgid = msg.msgid
    223  1.1.1.2  mrg 
    224  1.1.1.2  mrg         normalized = re.sub('%<[^%]+%>', '%qs', msgid)
    225  1.1.1.2  mrg         if normalized not in seen:
    226  1.1.1.2  mrg             seen[normalized] = msg
    227  1.1.1.2  mrg             seen[msgid] = msg
    228  1.1.1.2  mrg             continue
    229  1.1.1.2  mrg 
    230  1.1.1.2  mrg         prev = seen[normalized]
    231  1.1.1.2  mrg         warn(msg,
    232  1.1.1.2  mrg              'same-pattern',
    233  1.1.1.2  mrg              f'same pattern for {repr(msgid)} and '
    234  1.1.1.2  mrg              f'{repr(prev.msgid)} in {location(prev)}',
    235  1.1.1.2  mrg              include_msgid=False)
    236  1.1.1.2  mrg 
    237  1.1.1.2  mrg 
    238  1.1.1.2  mrg def lint_file(po: polib.POFile):
    239  1.1.1.2  mrg     for msg in po:
    240  1.1.1.2  mrg         msg: polib.POEntry
    241  1.1.1.2  mrg 
    242  1.1.1.2  mrg         if not msg.obsolete and not msg.fuzzy:
    243  1.1.1.2  mrg             if 'gcc-internal-format' in msg.flags:
    244  1.1.1.2  mrg                 lint_gcc_internal_format(msg)
    245  1.1.1.2  mrg 
    246  1.1.1.2  mrg     lint_diagnostics_differing_only_in_placeholders(po)
    247  1.1.1.2  mrg 
    248  1.1.1.2  mrg 
    249  1.1.1.2  mrg def main():
    250  1.1.1.2  mrg     parser = argparse.ArgumentParser(description='')
    251  1.1.1.2  mrg     parser.add_argument('file', help='pot file')
    252      1.1  mrg 
    253  1.1.1.2  mrg     args = parser.parse_args()
    254      1.1  mrg 
    255  1.1.1.2  mrg     po = polib.pofile(args.file)
    256  1.1.1.2  mrg     lint_file(po)
    257      1.1  mrg 
    258  1.1.1.2  mrg     print()
    259  1.1.1.2  mrg     print('summary:')
    260  1.1.1.2  mrg     for entry in seen_warnings.most_common():
    261  1.1.1.2  mrg         if entry[1] > 1:
    262  1.1.1.2  mrg             print(f'{entry[1]}\t{entry[0]}')
    263      1.1  mrg 
    264      1.1  mrg 
    265  1.1.1.2  mrg if __name__ == '__main__':
    266  1.1.1.2  mrg     main()
    267