keysym-generator.py revision 5395ea76
1#!/usr/bin/env python3
2#
3# SPDX-License-Identifier: MIT
4#
5# This script checks XF86keysym.h for the reserved evdev keysym range and/or
6# appends new keysym to that range. An up-to-date libevdev must be
7# available to guarantee the correct keycode ranges and names.
8#
9# Run with --help for usage information.
10#
11#
12# File is formatted with Python Black
13
14import argparse
15import logging
16import os
17import sys
18import re
19import subprocess
20from pathlib import Path
21
22try:
23    import libevdev
24except ModuleNotFoundError as e:
25    print(f"Error: {e}", file=sys.stderr)
26    print(
27        "One or more python modules are missing. Please install those "
28        "modules and re-run this tool."
29    )
30    sys.exit(77)
31
32
33logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
34logger = logging.getLogger("ksgen")
35
36start_token = re.compile(r"#define _EVDEVK.*")
37end_token = re.compile(r"#undef _EVDEVK\n")
38
39
40def die(msg):
41    logger.critical(msg)
42    sys.exit(1)
43
44
45def all_keysyms(directory):
46    """
47    Extract the key names for all keysyms we have in our repo and return
48    them as list.
49    """
50    keysym_names = []
51    pattern = re.compile(r"^#define\s+(?P<name>\w+)\s+(0x[0-9A-Fa-f]+)")
52    for path in directory.glob("*keysym*.h"):
53        with open(path) as fd:
54            for line in fd:
55                match = re.match(pattern, line)
56                if match:
57                    keysym_names.append(match.group("name"))
58    return keysym_names
59
60
61class Kernel(object):
62    """
63    Wrapper around the kernel git tree to simplify searching for when a
64    particular keycode was introduced.
65    """
66
67    def __init__(self, repo):
68        self.repo = repo
69
70        exitcode, stdout, stderr = self.git_command("git branch --show-current")
71        if exitcode != 0:
72            die(f"{stderr}")
73        if stdout.strip() != "master":
74            die(f"Kernel repo must be on the master branch (current: {stdout.strip()})")
75
76        exitcode, stdout, stderr = self.git_command("git tag --sort=version:refname")
77        tags = stdout.split("\n")
78        self.versions = list(
79            filter(lambda v: re.match(r"^v[2-6]\.[0-9]+(\.[0-9]+)?$", v), tags)
80        )
81        logger.debug(f"Kernel versions: {', '.join(self.versions)}")
82
83    def git_command(self, cmd):
84        """
85        Takes a single-string git command and runs it in the repo.
86
87        Returns the tuple (exitcode, stdout, stderr)
88        """
89        # logger.debug(f"git command: {cmd}")
90        try:
91            result = subprocess.run(
92                cmd.split(" "), cwd=self.repo, capture_output=True, encoding="utf8"
93            )
94            if result.returncode == 128:
95                die(f"{result.stderr}")
96
97            return result.returncode, result.stdout, result.stderr
98        except FileNotFoundError:
99            die(f"{self.repo} is not a git repository")
100
101    def introduced_in_version(self, string):
102        """
103        Search this repo for the first version with string in the headers.
104
105        Returns the kernel version number (e.g. "v5.10") or None
106        """
107
108        # The fastest approach is to git grep every version for the string
109        # and return the first. Using git log -G and then git tag --contains
110        # is an order of magnitude slower.
111        def found_in_version(v):
112            cmd = f"git grep -E \\<{string}\\> {v} -- include/"
113            exitcode, _, _ = self.git_command(cmd)
114            return exitcode == 0
115
116        def bisect(iterable, func):
117            """
118            Return the first element in iterable for which func
119            returns True.
120            """
121            # bias to speed things up: most keycodes will be in the first
122            # kernel version
123            if func(iterable[0]):
124                return iterable[0]
125
126            lo, hi = 0, len(iterable)
127            while lo < hi:
128                mid = (lo + hi) // 2
129                if func(iterable[mid]):
130                    hi = mid
131                else:
132                    lo = mid + 1
133            return iterable[hi]
134
135        version = bisect(self.versions, found_in_version)
136        logger.debug(f"Bisected {string} to {version}")
137        # 2.6.11 doesn't count, that's the start of git
138        return version if version != self.versions[0] else None
139
140
141def generate_keysym_line(code, kernel, kver_list=[]):
142    """
143    Generate the line to append to the keysym file.
144
145    This format is semi-ABI, scripts rely on the format of this line (e.g. in
146    xkeyboard-config).
147    """
148    evcode = libevdev.evbit(libevdev.EV_KEY.value, code)
149    if not evcode.is_defined:  # codes without a #define in the kernel
150        return None
151    if evcode.name.startswith("BTN_"):
152        return None
153
154    name = "".join([s.capitalize() for s in evcode.name[4:].lower().split("_")])
155    keysym = f"XF86XK_{name}"
156    tabs = 4 - len(keysym) // 8
157    kver = kernel.introduced_in_version(evcode.name) or " "
158    if kver_list:
159        from fnmatch import fnmatch
160
161        allowed_kvers = [v.strip() for v in kver_list.split(",")]
162        for allowed in allowed_kvers:
163            if fnmatch(kver, allowed):
164                break
165        else:  # no match
166            return None
167
168    return f"#define {keysym}{'	' * tabs}_EVDEVK(0x{code:03X})		/* {kver:5s} {evcode.name} */"
169
170
171def verify(ns):
172    """
173    Verify that the XF86keysym.h file follows the requirements. Since we expect
174    the header file to be parsed by outside scripts, the requirements for the format
175    are quite strict, including things like correct-case hex codes.
176    """
177
178    # No other keysym must use this range
179    reserved_range = re.compile(r"#define.*0x10081.*")
180    normal_range = re.compile(r"#define.*0x1008.*")
181
182    # This is the full pattern we expect.
183    expected_pattern = re.compile(
184        r"#define XF86XK_\w+\t+_EVDEVK\(0x([0-9A-F]{3})\)\t+/\* (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
185    )
186    # This is the comment pattern we expect
187    expected_comment_pattern = re.compile(
188        r"/\* Use: (?P<name>\w+)\t+_EVDEVK\(0x(?P<value>[0-9A-F]{3})\)\t+   (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
189    )
190
191    # Some patterns to spot specific errors, just so we can print useful errors
192    define = re.compile(r"^#define .*")
193    name_pattern = re.compile(r"#define (XF86XK_[^\s]*)")
194    tab_check = re.compile(r"#define \w+(\s+)[^\s]+(\s+)")
195    hex_pattern = re.compile(r".*0x([a-f0-9]+).*", re.I)
196    comment_format = re.compile(r".*/\* ([^\s]+)?\s+(\w+)")
197    kver_format = re.compile(r"v[2-6]\.[0-9]+(\.[0-9]+)?")
198
199    in_evdev_codes_section = False
200    had_evdev_codes_section = False
201    success = True
202
203    all_defines = []
204
205    all_keysym_names = all_keysyms(ns.header.parent)
206
207    class ParserError(Exception):
208        pass
209
210    def error(msg, line):
211        raise ParserError(f"{msg} in '{line.strip()}'")
212
213    last_keycode = 0
214    for line in open(ns.header):
215        try:
216            if not in_evdev_codes_section:
217                if re.match(start_token, line):
218                    in_evdev_codes_section = True
219                    had_evdev_codes_section = True
220                    continue
221
222                if re.match(reserved_range, line):
223                    error("Using reserved range", line)
224                match = re.match(name_pattern, line)
225                if match:
226                    all_defines.append(match.group(1))
227            else:
228                # Within the evdev defines section
229                if re.match(end_token, line):
230                    in_evdev_codes_section = False
231                    continue
232
233                # Comments we only search for a hex pattern and where there is one present
234                # we only check for uppercase format, ordering and update our last_keycode.
235                if not re.match(define, line):
236                    match = re.match(expected_comment_pattern, line)
237                    if match:
238                        hexcode = match.group("value")
239                        if hexcode != hexcode.upper():
240                            error(f"Hex code 0x{hexcode} must be uppercase", line)
241                        if hexcode:
242                            keycode = int(hexcode, 16)
243                            if keycode < last_keycode:
244                                error("Keycode must be ascending", line)
245                            if keycode == last_keycode:
246                                error("Duplicate keycode", line)
247                            last_keycode = keycode
248
249                        name = match.group("name")
250                        if name not in all_keysym_names:
251                            error(f"Unknown keysym {name}", line)
252                    elif re.match(hex_pattern, line):
253                        logger.warning(f"Unexpected hex code in {line}")
254                    continue
255
256                # Anything below here is a #define line
257                # Let's check for specific errors
258                if re.match(normal_range, line):
259                    error("Define must use _EVDEVK", line)
260
261                match = re.match(name_pattern, line)
262                if match:
263                    if match.group(1) in all_defines:
264                        error("Duplicate define", line)
265                    all_defines.append(match.group(1))
266                else:
267                    error("Typo", line)
268
269                match = re.match(hex_pattern, line)
270                if not match:
271                    error("No hex code", line)
272                if match.group(1) != match.group(1).upper():
273                    error(f"Hex code 0x{match.group(1)} must be uppercase", line)
274
275                tabs = re.match(tab_check, line)
276                if not tabs:  # bug
277                    error("Matching error", line)
278                if " " in tabs.group(1) or " " in tabs.group(2):
279                    error("Use tabs, not spaces", line)
280
281                comment = re.match(comment_format, line)
282                if not comment:
283                    error("Invalid comment format", line)
284                kver = comment.group(1)
285                if kver and not re.match(kver_format, kver):
286                    error("Invalid kernel version format", line)
287
288                keyname = comment.group(2)
289                if not keyname.startswith("KEY_") or keyname.upper() != keyname:
290                    error("Kernel keycode name invalid", line)
291
292                # This could be an old libevdev
293                if keyname not in [c.name for c in libevdev.EV_KEY.codes]:
294                    logger.warning(f"Unknown kernel keycode name {keyname}")
295
296                # Check the full expected format, no better error messages
297                # available if this fails
298                match = re.match(expected_pattern, line)
299                if not match:
300                    error("Failed match", line)
301
302                keycode = int(match.group(1), 16)
303                if keycode < last_keycode:
304                    error("Keycode must be ascending", line)
305                if keycode == last_keycode:
306                    error("Duplicate keycode", line)
307
308                # May cause a false positive for old libevdev if KEY_MAX is bumped
309                if keycode < 0x0A0 or keycode > libevdev.EV_KEY.KEY_MAX.value:
310                    error("Keycode outside range", line)
311
312                last_keycode = keycode
313        except ParserError as e:
314            logger.error(e)
315            success = False
316
317    if not had_evdev_codes_section:
318        logger.error("Unable to locate EVDEVK section")
319        success = False
320    elif in_evdev_codes_section:
321        logger.error("Unterminated EVDEVK section")
322        success = False
323
324    if success:
325        logger.info("Verification succeeded")
326
327    return 0 if success else 1
328
329
330def add_keysyms(ns):
331    """
332    Print a new XF86keysym.h file, adding any *missing* keycodes to the existing file.
333    """
334    if verify(ns) != 0:
335        die("Header file verification failed")
336
337    # If verification succeeds, we can be a bit more lenient here because we already know
338    # what the format of the field is. Specifically, we're searching for
339    # 3-digit hexcode in brackets and use that as keycode.
340    pattern = re.compile(r".*_EVDEVK\((0x[a-fA-F0-9]{3})\).*")
341    max_code = max(
342        [
343            c.value
344            for c in libevdev.EV_KEY.codes
345            if c.is_defined
346            and c != libevdev.EV_KEY.KEY_MAX
347            and not c.name.startswith("BTN")
348        ]
349    )
350
351    def defined_keycodes(path):
352        """
353        Returns an iterator to the next #defined (or otherwise mentioned)
354        keycode, all other lines (including the returned one) are passed
355        through to printf.
356        """
357        with open(path) as fd:
358            in_evdev_codes_section = False
359
360            for line in fd:
361                if not in_evdev_codes_section:
362                    if re.match(start_token, line):
363                        in_evdev_codes_section = True
364                    # passthrough for all other lines
365                    print(line, end="")
366                else:
367                    if re.match(r"#undef _EVDEVK\n", line):
368                        in_evdev_codes_section = False
369                        yield max_code
370                    else:
371                        match = re.match(pattern, line)
372                        if match:
373                            logger.debug(f"Found keycode in {line.strip()}")
374                            yield int(match.group(1), 16)
375                    print(line, end="")
376
377    kernel = Kernel(ns.kernel_git_tree)
378    prev_code = 255 - 8  # the last keycode we can map directly in X
379    for code in defined_keycodes(ns.header):
380        for missing in range(prev_code + 1, code):
381            newline = generate_keysym_line(
382                missing, kernel, kver_list=ns.kernel_versions
383            )
384            if newline:
385                print(newline)
386        prev_code = code
387
388    return 0
389
390
391def find_xf86keysym_header():
392    """
393    Search for the XF86keysym.h file in the current tree or use the system one
394    as last resort. This is a convenience function for running the script
395    locally, it should not be relied on in the CI.
396    """
397    paths = tuple(Path.cwd().glob("**/XF86keysym.h"))
398    if not paths:
399        fallbackdir = Path(os.getenv("INCLUDESDIR") or "/usr/include/")
400        path = fallbackdir / "X11" / "XF86keysym.h"
401        if not path.exists():
402            die(f"Unable to find XF86keysym.h in CWD or {fallbackdir}")
403    else:
404        if len(paths) > 1:
405            die("Multiple XF86keysym.h in CWD, please use --header")
406        path = paths[0]
407
408    logger.info(f"Using header file {path}")
409    return path
410
411
412def main():
413    parser = argparse.ArgumentParser(description="Keysym parser script")
414    parser.add_argument("--verbose", "-v", action="count", default=0)
415    parser.add_argument(
416        "--header",
417        type=str,
418        default=None,
419        help="Path to the XF86Keysym.h header file (default: search $CWD)",
420    )
421
422    subparsers = parser.add_subparsers(help="command-specific help", dest="command")
423    parser_verify = subparsers.add_parser(
424        "verify", help="Verify the XF86keysym.h matches requirements (default)"
425    )
426    parser_verify.set_defaults(func=verify)
427
428    parser_generate = subparsers.add_parser(
429        "add-keysyms", help="Add missing keysyms to the existing ones"
430    )
431    parser_generate.add_argument(
432        "--kernel-git-tree",
433        type=str,
434        default=None,
435        required=True,
436        help="Path to a kernel git repo, required to find git tags",
437    )
438    parser_generate.add_argument(
439        "--kernel-versions",
440        type=str,
441        default=[],
442        required=False,
443        help="Comma-separated list of kernel versions to limit ourselves to (e.g. 'v5.10,v5.9'). Supports fnmatch.",
444    )
445    parser_generate.set_defaults(func=add_keysyms)
446    ns = parser.parse_args()
447
448    logger.setLevel(
449        {2: logging.DEBUG, 1: logging.INFO, 0: logging.WARNING}.get(ns.verbose, 2)
450    )
451
452    if not ns.header:
453        ns.header = find_xf86keysym_header()
454    else:
455        ns.header = Path(ns.header)
456
457    if ns.command is None:
458        print("No command specified, defaulting to verify'")
459        ns.func = verify
460
461    sys.exit(ns.func(ns))
462
463
464if __name__ == "__main__":
465    main()
466