keysym-generator.py revision d63b911f
1#!/usr/bin/env python3
2#
3# SPDX-License-Identifier: MIT
4#
5# This script checks XF86keysym.h for the reserved evdev keysym range and/or
6# appends new keysym to that range. An up-to-date libevdev must be
7# available to guarantee the correct keycode ranges and names.
8#
9# Run with --help for usage information.
10#
11#
12# File is formatted with Python Black
13
14import argparse
15import logging
16import sys
17import re
18import libevdev
19import subprocess
20from pathlib import Path
21
22logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
23logger = logging.getLogger("ksgen")
24
25start_token = re.compile(r"#define _EVDEVK.*")
26end_token = re.compile(r"#undef _EVDEVK\n")
27
28
29def die(msg):
30    logger.critical(msg)
31    sys.exit(1)
32
33
34def all_keysyms(directory):
35    """
36    Extract the key names for all keysyms we have in our repo and return
37    them as list.
38    """
39    keysym_names = []
40    pattern = re.compile(r"^#define\s+(?P<name>\w+)\s+(0x[0-9A-Fa-f]+)")
41    for path in directory.glob("*keysym*.h"):
42        with open(path) as fd:
43            for line in fd:
44                match = re.match(pattern, line)
45                if match:
46                    keysym_names.append(match.group("name"))
47    return keysym_names
48
49
50class Kernel(object):
51    """
52    Wrapper around the kernel git tree to simplify searching for when a
53    particular keycode was introduced.
54    """
55
56    def __init__(self, repo):
57        self.repo = repo
58
59        exitcode, stdout, stderr = self.git_command("git branch --show-current")
60        if exitcode != 0:
61            die(f"{stderr}")
62        if stdout.strip() != "master":
63            die(f"Kernel repo must be on the master branch (current: {stdout.strip()})")
64
65        exitcode, stdout, stderr = self.git_command("git tag --sort=version:refname")
66        tags = stdout.split("\n")
67        self.versions = list(
68            filter(lambda v: re.match(r"^v[2-6]\.[0-9]+(\.[0-9]+)?$", v), tags)
69        )
70        logger.debug(f"Kernel versions: {', '.join(self.versions)}")
71
72    def git_command(self, cmd):
73        """
74        Takes a single-string git command and runs it in the repo.
75
76        Returns the tuple (exitcode, stdout, stderr)
77        """
78        # logger.debug(f"git command: {cmd}")
79        try:
80            result = subprocess.run(
81                cmd.split(" "), cwd=self.repo, capture_output=True, encoding="utf8"
82            )
83            if result.returncode == 128:
84                die(f"{result.stderr}")
85
86            return result.returncode, result.stdout, result.stderr
87        except FileNotFoundError:
88            die(f"{self.repo} is not a git repository")
89
90    def introduced_in_version(self, string):
91        """
92        Search this repo for the first version with string in the headers.
93
94        Returns the kernel version number (e.g. "v5.10") or None
95        """
96
97        # The fastest approach is to git grep every version for the string
98        # and return the first. Using git log -G and then git tag --contains
99        # is an order of magnitude slower.
100        def found_in_version(v):
101            cmd = f"git grep -E \\<{string}\\> {v} -- include/"
102            exitcode, _, _ = self.git_command(cmd)
103            return exitcode == 0
104
105        def bisect(iterable, func):
106            """
107            Return the first element in iterable for which func
108            returns True.
109            """
110            # bias to speed things up: most keycodes will be in the first
111            # kernel version
112            if func(iterable[0]):
113                return iterable[0]
114
115            lo, hi = 0, len(iterable)
116            while lo < hi:
117                mid = (lo + hi) // 2
118                if func(iterable[mid]):
119                    hi = mid
120                else:
121                    lo = mid + 1
122            return iterable[hi]
123
124        version = bisect(self.versions, found_in_version)
125        logger.debug(f"Bisected {string} to {version}")
126        # 2.6.11 doesn't count, that's the start of git
127        return version if version != self.versions[0] else None
128
129
130def generate_keysym_line(code, kernel, kver_list=[]):
131    """
132    Generate the line to append to the keysym file.
133
134    This format is semi-ABI, scripts rely on the format of this line (e.g. in
135    xkeyboard-config).
136    """
137    evcode = libevdev.evbit(libevdev.EV_KEY.value, code)
138    if not evcode.is_defined:  # codes without a #define in the kernel
139        return None
140    if evcode.name.startswith("BTN_"):
141        return None
142
143    name = "".join([s.capitalize() for s in evcode.name[4:].lower().split("_")])
144    keysym = f"XF86XK_{name}"
145    tabs = 4 - len(keysym) // 8
146    kver = kernel.introduced_in_version(evcode.name) or " "
147    if kver_list:
148        from fnmatch import fnmatch
149
150        allowed_kvers = [v.strip() for v in kver_list.split(",")]
151        for allowed in allowed_kvers:
152            if fnmatch(kver, allowed):
153                break
154        else:  # no match
155            return None
156
157    return f"#define {keysym}{'	' * tabs}_EVDEVK(0x{code:03X})		/* {kver:5s} {evcode.name} */"
158
159
160def verify(ns):
161    """
162    Verify that the XF86keysym.h file follows the requirements. Since we expect
163    the header file to be parsed by outside scripts, the requirements for the format
164    are quite strict, including things like correct-case hex codes.
165    """
166
167    # No other keysym must use this range
168    reserved_range = re.compile(r"#define.*0x10081.*")
169    normal_range = re.compile(r"#define.*0x1008.*")
170
171    # This is the full pattern we expect.
172    expected_pattern = re.compile(
173        r"#define XF86XK_\w+\t+_EVDEVK\(0x([0-9A-F]{3})\)\t+/\* (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
174    )
175    # This is the comment pattern we expect
176    expected_comment_pattern = re.compile(
177        r"/\* Use: (?P<name>\w+)\t+_EVDEVK\(0x(?P<value>[0-9A-F]{3})\)\t+   (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
178    )
179
180    # Some patterns to spot specific errors, just so we can print useful errors
181    define = re.compile(r"^#define .*")
182    name_pattern = re.compile(r"#define (XF86XK_[^\s]*)")
183    tab_check = re.compile(r"#define \w+(\s+)[^\s]+(\s+)")
184    hex_pattern = re.compile(r".*0x([a-f0-9]+).*", re.I)
185    comment_format = re.compile(r".*/\* ([^\s]+)?\s+(\w+)")
186    kver_format = re.compile(r"v[2-6]\.[0-9]+(\.[0-9]+)?")
187
188    in_evdev_codes_section = False
189    had_evdev_codes_section = False
190    success = True
191
192    all_defines = []
193
194    all_keysym_names = all_keysyms(ns.header.parent)
195
196    class ParserError(Exception):
197        pass
198
199    def error(msg, line):
200        raise ParserError(f"{msg} in '{line.strip()}'")
201
202    last_keycode = 0
203    for line in open(ns.header):
204        try:
205            if not in_evdev_codes_section:
206                if re.match(start_token, line):
207                    in_evdev_codes_section = True
208                    had_evdev_codes_section = True
209                    continue
210
211                if re.match(reserved_range, line):
212                    error("Using reserved range", line)
213                match = re.match(name_pattern, line)
214                if match:
215                    all_defines.append(match.group(1))
216            else:
217                # Within the evdev defines section
218                if re.match(end_token, line):
219                    in_evdev_codes_section = False
220                    continue
221
222                # Comments we only search for a hex pattern and where there is one present
223                # we only check for uppercase format, ordering and update our last_keycode.
224                if not re.match(define, line):
225                    match = re.match(expected_comment_pattern, line)
226                    if match:
227                        hexcode = match.group("value")
228                        if hexcode != hexcode.upper():
229                            error(f"Hex code 0x{hexcode} must be uppercase", line)
230                        if hexcode:
231                            keycode = int(hexcode, 16)
232                            if keycode < last_keycode:
233                                error("Keycode must be ascending", line)
234                            if keycode == last_keycode:
235                                error("Duplicate keycode", line)
236                            last_keycode = keycode
237
238                        name = match.group("name")
239                        if name not in all_keysym_names:
240                            error(f"Unknown keysym {name}", line)
241                    elif re.match(hex_pattern, line):
242                        logger.warning(f"Unexpected hex code in {line}")
243                    continue
244
245                # Anything below here is a #define line
246                # Let's check for specific errors
247                if re.match(normal_range, line):
248                    error("Define must use _EVDEVK", line)
249
250                match = re.match(name_pattern, line)
251                if match:
252                    if match.group(1) in all_defines:
253                        error("Duplicate define", line)
254                    all_defines.append(match.group(1))
255                else:
256                    error("Typo", line)
257
258                match = re.match(hex_pattern, line)
259                if not match:
260                    error("No hex code", line)
261                if match.group(1) != match.group(1).upper():
262                    error(f"Hex code 0x{match.group(1)} must be uppercase", line)
263
264                tabs = re.match(tab_check, line)
265                if not tabs:  # bug
266                    error("Matching error", line)
267                if " " in tabs.group(1) or " " in tabs.group(2):
268                    error("Use tabs, not spaces", line)
269
270                comment = re.match(comment_format, line)
271                if not comment:
272                    error("Invalid comment format", line)
273                kver = comment.group(1)
274                if kver and not re.match(kver_format, kver):
275                    error("Invalid kernel version format", line)
276
277                keyname = comment.group(2)
278                if not keyname.startswith("KEY_") or keyname.upper() != keyname:
279                    error("Kernel keycode name invalid", line)
280
281                # This could be an old libevdev
282                if keyname not in [c.name for c in libevdev.EV_KEY.codes]:
283                    logger.warning(f"Unknown kernel keycode name {keyname}")
284
285                # Check the full expected format, no better error messages
286                # available if this fails
287                match = re.match(expected_pattern, line)
288                if not match:
289                    error("Failed match", line)
290
291                keycode = int(match.group(1), 16)
292                if keycode < last_keycode:
293                    error("Keycode must be ascending", line)
294                if keycode == last_keycode:
295                    error("Duplicate keycode", line)
296
297                # May cause a false positive for old libevdev if KEY_MAX is bumped
298                if keycode < 0x0A0 or keycode > libevdev.EV_KEY.KEY_MAX.value:
299                    error("Keycode outside range", line)
300
301                last_keycode = keycode
302        except ParserError as e:
303            logger.error(e)
304            success = False
305
306    if not had_evdev_codes_section:
307        logger.error("Unable to locate EVDEVK section")
308        success = False
309    elif in_evdev_codes_section:
310        logger.error("Unterminated EVDEVK section")
311        success = False
312
313    if success:
314        logger.info("Verification succeeded")
315
316    return 0 if success else 1
317
318
319def add_keysyms(ns):
320    """
321    Print a new XF86keysym.h file, adding any *missing* keycodes to the existing file.
322    """
323    if verify(ns) != 0:
324        die("Header file verification failed")
325
326    # If verification succeeds, we can be a bit more lenient here because we already know
327    # what the format of the field is. Specifically, we're searching for
328    # 3-digit hexcode in brackets and use that as keycode.
329    pattern = re.compile(r".*_EVDEVK\((0x[a-fA-F0-9]{3})\).*")
330    max_code = max(
331        [
332            c.value
333            for c in libevdev.EV_KEY.codes
334            if c.is_defined
335            and c != libevdev.EV_KEY.KEY_MAX
336            and not c.name.startswith("BTN")
337        ]
338    )
339
340    def defined_keycodes(path):
341        """
342        Returns an iterator to the next #defined (or otherwise mentioned)
343        keycode, all other lines (including the returned one) are passed
344        through to printf.
345        """
346        with open(path) as fd:
347            in_evdev_codes_section = False
348
349            for line in fd:
350                if not in_evdev_codes_section:
351                    if re.match(start_token, line):
352                        in_evdev_codes_section = True
353                    # passthrough for all other lines
354                    print(line, end="")
355                else:
356                    if re.match(r"#undef _EVDEVK\n", line):
357                        in_evdev_codes_section = False
358                        yield max_code
359                    else:
360                        match = re.match(pattern, line)
361                        if match:
362                            logger.debug(f"Found keycode in {line.strip()}")
363                            yield int(match.group(1), 16)
364                    print(line, end="")
365
366    kernel = Kernel(ns.kernel_git_tree)
367    prev_code = 255 - 8  # the last keycode we can map directly in X
368    for code in defined_keycodes(ns.header):
369        for missing in range(prev_code + 1, code):
370            newline = generate_keysym_line(
371                missing, kernel, kver_list=ns.kernel_versions
372            )
373            if newline:
374                print(newline)
375        prev_code = code
376
377    return 0
378
379
380def find_xf86keysym_header():
381    """
382    Search for the XF86keysym.h file in the current tree or use the system one
383    as last resort. This is a convenience function for running the script
384    locally, it should not be relied on in the CI.
385    """
386    paths = tuple(Path.cwd().glob("**/XF86keysym.h"))
387    if not paths:
388        path = Path("/usr/include/X11/XF86keysym.h")
389        if not path.exists():
390            die("Unable to find XF86keysym.h in CWD or /usr")
391    else:
392        if len(paths) > 1:
393            die("Multiple XF86keysym.h in CWD, please use --header")
394        path = paths[0]
395
396    logger.info(f"Using header file {path}")
397    return path
398
399
400def main():
401    parser = argparse.ArgumentParser(description="Keysym parser script")
402    parser.add_argument("--verbose", "-v", action="count", default=0)
403    parser.add_argument(
404        "--header",
405        type=str,
406        default=None,
407        help="Path to the XF86Keysym.h header file (default: search $CWD)",
408    )
409
410    subparsers = parser.add_subparsers(help="command-specific help", dest="command")
411    parser_verify = subparsers.add_parser(
412        "verify", help="Verify the XF86keysym.h matches requirements"
413    )
414    parser_verify.set_defaults(func=verify)
415
416    parser_generate = subparsers.add_parser(
417        "add-keysyms", help="Add missing keysyms to the existing ones"
418    )
419    parser_generate.add_argument(
420        "--kernel-git-tree",
421        type=str,
422        default=None,
423        required=True,
424        help="Path to a kernel git repo, required to find git tags",
425    )
426    parser_generate.add_argument(
427        "--kernel-versions",
428        type=str,
429        default=[],
430        required=False,
431        help="Comma-separated list of kernel versions to limit ourselves to (e.g. 'v5.10,v5.9'). Supports fnmatch.",
432    )
433    parser_generate.set_defaults(func=add_keysyms)
434    ns = parser.parse_args()
435
436    logger.setLevel(
437        {2: logging.DEBUG, 1: logging.INFO, 0: logging.WARNING}.get(ns.verbose, 2)
438    )
439
440    if not ns.header:
441        ns.header = find_xf86keysym_header()
442    else:
443        ns.header = Path(ns.header)
444
445    if ns.command is None:
446        parser.error("Invalid or missing command")
447
448    sys.exit(ns.func(ns))
449
450
451if __name__ == "__main__":
452    main()
453