1#!/usr/bin/env python3
2#
3# SPDX-License-Identifier: MIT
4#
5# This script checks XF86keysym.h for the reserved evdev keysym range and/or
6# appends new keysym to that range. An up-to-date libevdev must be
7# available to guarantee the correct keycode ranges and names.
8#
9# Run with --help for usage information.
10#
11#
12# File is formatted with Python Black
13
14import argparse
15import logging
16import os
17import sys
18import re
19import subprocess
20from pathlib import Path
21
22try:
23    import libevdev
24except ModuleNotFoundError as e:
25    print(f"Error: {e}", file=sys.stderr)
26    print(
27        "One or more python modules are missing. Please install those "
28        "modules and re-run this tool."
29    )
30    sys.exit(77)
31
32
33logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
34logger = logging.getLogger("ksgen")
35
36start_token = re.compile(r"#define _EVDEVK.*")
37end_token = re.compile(r"#undef _EVDEVK\n")
38
39
40def die(msg):
41    logger.critical(msg)
42    sys.exit(1)
43
44
45def all_keysyms(directory):
46    """
47    Extract the key names for all keysyms we have in our repo and return
48    them as list.
49    """
50    keysym_names = []
51    pattern = re.compile(r"^#define\s+(?P<name>\w+)\s+(0x[0-9A-Fa-f]+)")
52    for path in directory.glob("*keysym*.h"):
53        with open(path) as fd:
54            for line in fd:
55                match = re.match(pattern, line)
56                if match:
57                    keysym_names.append(match.group("name"))
58    return keysym_names
59
60
61class Kernel(object):
62    """
63    Wrapper around the kernel git tree to simplify searching for when a
64    particular keycode was introduced.
65    """
66
67    def __init__(self, repo):
68        self.repo = repo
69
70        exitcode, stdout, stderr = self.git_command("git branch --show-current")
71        if exitcode != 0:
72            die(f"{stderr}")
73        if stdout.strip() != "master":
74            die(f"Kernel repo must be on the master branch (current: {stdout.strip()})")
75
76        exitcode, stdout, stderr = self.git_command("git tag --sort=version:refname")
77        tags = stdout.split("\n")
78        self.versions = list(
79            filter(lambda v: re.match(r"^v[2-6]\.[0-9]+(\.[0-9]+)?$", v), tags)
80        )
81        logger.debug(f"Kernel versions: {', '.join(self.versions)}")
82
83    def git_command(self, cmd):
84        """
85        Takes a single-string git command and runs it in the repo.
86
87        Returns the tuple (exitcode, stdout, stderr)
88        """
89        # logger.debug(f"git command: {cmd}")
90        try:
91            result = subprocess.run(
92                cmd.split(" "), cwd=self.repo, capture_output=True, encoding="utf8"
93            )
94            if result.returncode == 128:
95                die(f"{result.stderr}")
96
97            return result.returncode, result.stdout, result.stderr
98        except FileNotFoundError:
99            die(f"{self.repo} is not a git repository")
100
101    def introduced_in_version(self, string):
102        """
103        Search this repo for the first version with string in the headers.
104
105        Returns the kernel version number (e.g. "v5.10") or None
106        """
107
108        # The fastest approach is to git grep every version for the string
109        # and return the first. Using git log -G and then git tag --contains
110        # is an order of magnitude slower.
111        def found_in_version(v):
112            cmd = f"git grep -E \\<{string}\\> {v} -- include/"
113            exitcode, _, _ = self.git_command(cmd)
114            return exitcode == 0
115
116        def bisect(iterable, func):
117            """
118            Return the first element in iterable for which func
119            returns True.
120            """
121            # bias to speed things up: most keycodes will be in the first
122            # kernel version
123            if func(iterable[0]):
124                return iterable[0]
125
126            lo, hi = 0, len(iterable)
127            while lo < hi:
128                mid = (lo + hi) // 2
129                if func(iterable[mid]):
130                    hi = mid
131                else:
132                    lo = mid + 1
133            return iterable[hi]
134
135        version = bisect(self.versions, found_in_version)
136        logger.debug(f"Bisected {string} to {version}")
137        # 2.6.11 doesn't count, that's the start of git
138        return version if version != self.versions[0] else None
139
140
141TARGET_KEYSYM_COLUMN = 54
142"""Column in the file we want the keysym codes to end"""
143KEYSYM_NAME_MAX_LENGTH = TARGET_KEYSYM_COLUMN - len("#define ") - len("_EVDEVK(0xNNN)")
144KERNEL_VERSION_PADDING = 7
145
146
147def generate_keysym_line(code, kernel, kver_list=[]):
148    """
149    Generate the line to append to the keysym file.
150
151    This format is semi-ABI, scripts rely on the format of this line (e.g. in
152    xkeyboard-config).
153    """
154    evcode = libevdev.evbit(libevdev.EV_KEY.value, code)
155    if not evcode.is_defined:  # codes without a #define in the kernel
156        return None
157    if evcode.name.startswith("BTN_"):
158        return None
159
160    name = "".join([s.capitalize() for s in evcode.name[4:].lower().split("_")])
161    keysym = f"XF86XK_{name}"
162    spaces = KEYSYM_NAME_MAX_LENGTH - len(keysym)
163    if not spaces:
164        raise ValueError(f"Insufficient padding for keysym “{keysym}”.")
165    kver = kernel.introduced_in_version(evcode.name) or " "
166    if kver_list:
167        from fnmatch import fnmatch
168
169        allowed_kvers = [v.strip() for v in kver_list.split(",")]
170        for allowed in allowed_kvers:
171            if fnmatch(kver, allowed):
172                break
173        else:  # no match
174            return None
175
176    return f"#define {keysym}{' ' * spaces}_EVDEVK(0x{code:03x})  /* {kver: <{KERNEL_VERSION_PADDING}s} {evcode.name} */"
177
178
179def verify(ns):
180    """
181    Verify that the XF86keysym.h file follows the requirements. Since we expect
182    the header file to be parsed by outside scripts, the requirements for the format
183    are quite strict, including things like correct-case hex codes.
184    """
185
186    # No other keysym must use this range
187    reserved_range = re.compile(r"#define.*0x10081.*")
188    normal_range = re.compile(r"#define.*0x1008.*")
189
190    # This is the full pattern we expect.
191    expected_pattern = re.compile(
192        r"#define XF86XK_\w+ +_EVDEVK\(0x([0-9A-Fa-f]{3})\) +/\* (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
193    )
194    # This is the comment pattern we expect
195    expected_comment_pattern = re.compile(
196        r"/\* Use: (?P<name>\w+) +_EVDEVK\(0x(?P<value>[0-9A-Fa-f]{3})\) +   (v[2-6]\.[0-9]+(\.[0-9]+)?)? +KEY_\w+ \*/"
197    )
198
199    # Some patterns to spot specific errors, just so we can print useful errors
200    define = re.compile(r"^#define .*")
201    name_pattern = re.compile(r"#define (XF86XK_[^\s]*)")
202    space_check = re.compile(r"#define \w+(\s+)[^\s]+(\s+)")
203    hex_pattern = re.compile(r".*0x([a-f0-9]+).*", re.I)
204    comment_format = re.compile(r".*/\* ([^\s]+)?\s+(\w+)")
205    kver_format = re.compile(r"v[2-6]\.[0-9]+(\.[0-9]+)?")
206
207    in_evdev_codes_section = False
208    had_evdev_codes_section = False
209    success = True
210
211    all_defines = []
212
213    all_keysym_names = all_keysyms(ns.header.parent)
214
215    class ParserError(Exception):
216        pass
217
218    def error(msg, line):
219        raise ParserError(f"{msg} in '{line.strip()}'")
220
221    last_keycode = 0
222    for line in open(ns.header):
223        try:
224            if not in_evdev_codes_section:
225                if re.match(start_token, line):
226                    in_evdev_codes_section = True
227                    had_evdev_codes_section = True
228                    continue
229
230                if re.match(reserved_range, line):
231                    error("Using reserved range", line)
232                match = re.match(name_pattern, line)
233                if match:
234                    all_defines.append(match.group(1))
235            else:
236                # Within the evdev defines section
237                if re.match(end_token, line):
238                    in_evdev_codes_section = False
239                    continue
240
241                # Comments we only search for a hex pattern and where there is one present
242                # we only check for lower case format, ordering and update our last_keycode.
243                if not re.match(define, line):
244                    match = re.match(expected_comment_pattern, line)
245                    if match:
246                        hexcode = match.group("value")
247                        if hexcode != hexcode.lower():
248                            error(f"Hex code 0x{hexcode} must be lower case", line)
249                        if hexcode:
250                            keycode = int(hexcode, 16)
251                            if keycode < last_keycode:
252                                error("Keycode must be ascending", line)
253                            if keycode == last_keycode:
254                                error("Duplicate keycode", line)
255                            last_keycode = keycode
256
257                        name = match.group("name")
258                        if name not in all_keysym_names:
259                            error(f"Unknown keysym {name}", line)
260                    elif re.match(hex_pattern, line):
261                        logger.warning(f"Unexpected hex code in {line}")
262                    continue
263
264                # Anything below here is a #define line
265                # Let's check for specific errors
266                if re.match(normal_range, line):
267                    error("Define must use _EVDEVK", line)
268
269                match = re.match(name_pattern, line)
270                if match:
271                    if match.group(1) in all_defines:
272                        error("Duplicate define", line)
273                    all_defines.append(match.group(1))
274                else:
275                    error("Typo", line)
276
277                match = re.match(hex_pattern, line)
278                if not match:
279                    error("No hex code", line)
280                if match.group(1) != match.group(1).lower():
281                    error(f"Hex code 0x{match.group(1)} must be lowercase", line)
282
283                spaces = re.match(space_check, line)
284                if not spaces:  # bug
285                    error("Matching error", line)
286                if "\t" in spaces.group(1) or "\t" in spaces.group(2):
287                    error("Use spaces, not tabs", line)
288
289                comment = re.match(comment_format, line)
290                if not comment:
291                    error("Invalid comment format", line)
292                kver = comment.group(1)
293                if kver and not re.match(kver_format, kver):
294                    error("Invalid kernel version format", line)
295
296                keyname = comment.group(2)
297                if not keyname.startswith("KEY_") or keyname.upper() != keyname:
298                    error("Kernel keycode name invalid", line)
299
300                # This could be an old libevdev
301                if keyname not in [c.name for c in libevdev.EV_KEY.codes]:
302                    logger.warning(f"Unknown kernel keycode name {keyname}")
303
304                # Check the full expected format, no better error messages
305                # available if this fails
306                match = re.match(expected_pattern, line)
307                if not match:
308                    error("Failed match", line)
309
310                keycode = int(match.group(1), 16)
311                if keycode < last_keycode:
312                    error("Keycode must be ascending", line)
313                if keycode == last_keycode:
314                    error("Duplicate keycode", line)
315
316                # May cause a false positive for old libevdev if KEY_MAX is bumped
317                if keycode < 0x0A0 or keycode > libevdev.EV_KEY.KEY_MAX.value:
318                    error("Keycode outside range", line)
319
320                last_keycode = keycode
321        except ParserError as e:
322            logger.error(e)
323            success = False
324
325    if not had_evdev_codes_section:
326        logger.error("Unable to locate EVDEVK section")
327        success = False
328    elif in_evdev_codes_section:
329        logger.error("Unterminated EVDEVK section")
330        success = False
331
332    if success:
333        logger.info("Verification succeeded")
334
335    return 0 if success else 1
336
337
338def add_keysyms(ns):
339    """
340    Print a new XF86keysym.h file, adding any *missing* keycodes to the existing file.
341    """
342    if verify(ns) != 0:
343        die("Header file verification failed")
344
345    # If verification succeeds, we can be a bit more lenient here because we already know
346    # what the format of the field is. Specifically, we're searching for
347    # 3-digit hexcode in brackets and use that as keycode.
348    pattern = re.compile(r".*_EVDEVK\((0x[0-9A-Fa-f]{3})\).*")
349    max_code = max(
350        [
351            c.value
352            for c in libevdev.EV_KEY.codes
353            if c.is_defined
354            and c != libevdev.EV_KEY.KEY_MAX
355            and not c.name.startswith("BTN")
356        ]
357    )
358
359    def defined_keycodes(path):
360        """
361        Returns an iterator to the next #defined (or otherwise mentioned)
362        keycode, all other lines (including the returned one) are passed
363        through to printf.
364        """
365        with open(path) as fd:
366            in_evdev_codes_section = False
367
368            for line in fd:
369                if not in_evdev_codes_section:
370                    if re.match(start_token, line):
371                        in_evdev_codes_section = True
372                    # passthrough for all other lines
373                    print(line, end="")
374                else:
375                    if re.match(r"#undef _EVDEVK\n", line):
376                        in_evdev_codes_section = False
377                        yield max_code
378                    else:
379                        match = re.match(pattern, line)
380                        if match:
381                            logger.debug(f"Found keycode in {line.strip()}")
382                            yield int(match.group(1), 16)
383                    print(line, end="")
384
385    kernel = Kernel(ns.kernel_git_tree)
386    prev_code = 255 - 8  # the last keycode we can map directly in X
387    for code in defined_keycodes(ns.header):
388        for missing in range(prev_code + 1, code):
389            newline = generate_keysym_line(
390                missing, kernel, kver_list=ns.kernel_versions
391            )
392            if newline:
393                print(newline)
394        prev_code = code
395
396    return 0
397
398
399def find_xf86keysym_header():
400    """
401    Search for the XF86keysym.h file in the current tree or use the system one
402    as last resort. This is a convenience function for running the script
403    locally, it should not be relied on in the CI.
404    """
405    paths = tuple(Path.cwd().glob("**/XF86keysym.h"))
406    if not paths:
407        fallbackdir = Path(os.getenv("INCLUDESDIR") or "/usr/include/")
408        path = fallbackdir / "X11" / "XF86keysym.h"
409        if not path.exists():
410            die(f"Unable to find XF86keysym.h in CWD or {fallbackdir}")
411    else:
412        if len(paths) > 1:
413            die("Multiple XF86keysym.h in CWD, please use --header")
414        path = paths[0]
415
416    logger.info(f"Using header file {path}")
417    return path
418
419
420def main():
421    parser = argparse.ArgumentParser(description="Keysym parser script")
422    parser.add_argument("--verbose", "-v", action="count", default=0)
423    parser.add_argument(
424        "--header",
425        type=str,
426        default=None,
427        help="Path to the XF86Keysym.h header file (default: search $CWD)",
428    )
429
430    subparsers = parser.add_subparsers(help="command-specific help", dest="command")
431    parser_verify = subparsers.add_parser(
432        "verify", help="Verify the XF86keysym.h matches requirements (default)"
433    )
434    parser_verify.set_defaults(func=verify)
435
436    parser_generate = subparsers.add_parser(
437        "add-keysyms", help="Add missing keysyms to the existing ones"
438    )
439    parser_generate.add_argument(
440        "--kernel-git-tree",
441        type=str,
442        default=None,
443        required=True,
444        help="Path to a kernel git repo, required to find git tags",
445    )
446    parser_generate.add_argument(
447        "--kernel-versions",
448        type=str,
449        default=[],
450        required=False,
451        help="Comma-separated list of kernel versions to limit ourselves to (e.g. 'v5.10,v5.9'). Supports fnmatch.",
452    )
453    parser_generate.set_defaults(func=add_keysyms)
454    ns = parser.parse_args()
455
456    logger.setLevel(
457        {2: logging.DEBUG, 1: logging.INFO, 0: logging.WARNING}.get(ns.verbose, 2)
458    )
459
460    if not ns.header:
461        ns.header = find_xf86keysym_header()
462    else:
463        ns.header = Path(ns.header)
464
465    if ns.command is None:
466        print("No command specified, defaulting to verify'")
467        ns.func = verify
468
469    sys.exit(ns.func(ns))
470
471
472if __name__ == "__main__":
473    main()
474