Home | History | Annotate | Line # | Download | only in misc
      1 ############################################################################
      2 # Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      3 #
      4 # SPDX-License-Identifier: MPL-2.0
      5 #
      6 # This Source Code Form is subject to the terms of the Mozilla Public
      7 # License, v. 2.0. If a copy of the MPL was not distributed with this
      8 # file, you can obtain one at https://mozilla.org/MPL/2.0/.
      9 #
     10 # See the COPYRIGHT file distributed with this work for additional
     11 # information regarding copyright ownership.
     12 ############################################################################
     13 
     14 """
     15 Read ISC config grammar description produced by "cfg_test --grammar",
     16 transform it into JSON, and print it to stdout.
     17 
     18 Beware: This parser is pretty dumb and heavily depends on cfg_test output
     19 format. See parse_mapbody() for more details.
     20 
     21 Maps are recursively parsed into sub-dicts, all other elements (lists etc.)
     22 are left intact and returned as one string.
     23 
     24 Output example from named.conf grammar showing three variants follow.
     25 Keys "_flags" and "_id" are present only if non-empty. Key "_grammar" denotes
     26 end node, key "_mapbody" denotes a nested map.
     27 
     28 {
     29     "acl": {
     30         "_flags": [
     31             "may occur multiple times"
     32         ],
     33         "_grammar": "<string> { <address_match_element>; ... }"
     34     },
     35     "http": {
     36         "_flags": [
     37             "may occur multiple times"
     38         ],
     39         "_id": "<string>",
     40         "_mapbody": {
     41             "endpoints": {
     42                 "_grammar": "{ <quoted_string>; ... }"
     43             },
     44             "streams-per-connection": {
     45                 "_grammar": "<integer>"
     46             }
     47         }
     48     },
     49     "options": {
     50         "_mapbody": {
     51             "rate-limit": {
     52                 "_mapbody": {
     53                     "all-per-second": {
     54                         "_grammar": "<integer>"
     55                     }
     56                 }
     57             }
     58         }
     59     }
     60 }
     61 """
     62 import fileinput
     63 import json
     64 import re
     65 
     66 FLAGS = [
     67     "may occur multiple times",
     68     "obsolete",
     69     "deprecated",
     70     "experimental",
     71     "test only",
     72 ]
     73 
     74 KEY_REGEX = re.compile("[a-zA-Z0-9-]+")
     75 
     76 
     77 def split_comments(line):
     78     """Split line on comment boundary and strip right-side whitespace.
     79     Supports only #, //, and /* comments which end at the end of line.
     80     It does NOT handle:
     81     - quoted strings
     82     - /* comments which do not end at line boundary
     83     - multiple /* comments on a single line
     84     """
     85     assert '"' not in line, 'lines with " are not supported'
     86     data_end_idx = len(line)
     87     for delimiter in ["#", "//", "/*"]:
     88         try:
     89             data_end_idx = min(line.index(delimiter), data_end_idx)
     90         except ValueError:
     91             continue
     92         if delimiter == "/*":
     93             # sanity checks
     94             if not line.rstrip().endswith("*/"):
     95                 raise NotImplementedError(
     96                     "unsupported /* comment, does not end at the end of line", line
     97                 )
     98             if "/*" in line[data_end_idx + 1 :]:
     99                 raise NotImplementedError(
    100                     "unsupported line with multiple /* comments", line
    101                 )
    102 
    103     noncomment = line[:data_end_idx]
    104     comment = line[data_end_idx:]
    105     return noncomment, comment
    106 
    107 
    108 def parse_line(filein):
    109     """Consume single line from input, return non-comment and comment."""
    110     for line in filein:
    111         line, comment = split_comments(line)
    112         line = line.strip()
    113         comment = comment.strip()
    114         if not line:
    115             continue
    116         yield line, comment
    117 
    118 
    119 def parse_flags(comments):
    120     """Extract known flags from comments. Must match exact strings used by cfg_test."""
    121     out = []
    122     for flag in FLAGS:
    123         if flag in comments:
    124             out.append(flag)
    125     return out
    126 
    127 
    128 def parse_mapbody(filein):
    129     """Parse body of a "map" in ISC config format.
    130 
    131     Input lines can be only:
    132     - whitespace & comments only -> ignore
    133     - <keyword> <anything>; -> store <anything> as "_grammar" for this keyword
    134     - <keyword> <anything> { -> parse sub-map and store (optional) <anything> as "_id",
    135                                 producing nested dict under "_mapbody"
    136     Also store known strings found at the end of line in "_flags".
    137 
    138     Returns:
    139     - tuple (map dict, map comment) when }; line is reached
    140     - map dict when we run out of lines without the closing };
    141     """
    142     thismap = {}
    143     for line, comment in parse_line(filein):
    144         flags = parse_flags(comment)
    145         if line == "};":  # end of a nested map
    146             return thismap, flags
    147 
    148         # first word - a map key name
    149         # beware: some statements do not have parameters, e.g. "null;"
    150         key = line.split()[0].rstrip(";")
    151         # map key sanity check
    152         if not KEY_REGEX.fullmatch(key):
    153             raise NotImplementedError("suspicious keyword detected", line)
    154 
    155         # omit keyword from the grammar
    156         grammar = line[len(key) :].strip()
    157         # also skip final ; or {
    158         grammar = grammar[:-1].strip()
    159 
    160         thismap[key] = {}
    161         if line.endswith("{"):
    162             # nested map, recurse, but keep "extra identifiers" if any
    163             try:
    164                 subkeys, flags = parse_mapbody(filein)
    165             except ValueError:
    166                 raise ValueError("unfinished nested map, missing }; detected") from None
    167             if flags:
    168                 thismap[key]["_flags"] = flags
    169             if grammar:
    170                 # for lines which look like "view <name> {" store "<name>"
    171                 thismap[key]["_id"] = grammar
    172             thismap[key]["_mapbody"] = subkeys
    173         else:
    174             assert line.endswith(";")
    175             if flags:
    176                 thismap[key]["_flags"] = flags
    177             thismap[key]["_grammar"] = grammar
    178 
    179     # Ran out of lines: can happen only on the end of the top-level map-body!
    180     # Intentionally do not return second parameter to cause ValueError
    181     # if we reach this spot with a missing }; in a nested map.
    182     assert len(thismap)
    183     return thismap
    184 
    185 
    186 def main():
    187     """Read stdin or filename provided on command line"""
    188     with fileinput.input() as filein:
    189         grammar = parse_mapbody(filein)
    190     print(json.dumps(grammar, indent=4))
    191 
    192 
    193 if __name__ == "__main__":
    194     main()
    195