Home | History | Annotate | Line # | Download | only in gdb
      1 #!/usr/bin/env python3
      2 
      3 # Generate Unicode case-folding table for Ada.
      4 
      5 # Copyright (C) 2022-2024 Free Software Foundation, Inc.
      6 
      7 # This file is part of GDB.
      8 
      9 # This program is free software; you can redistribute it and/or modify
     10 # it under the terms of the GNU General Public License as published by
     11 # the Free Software Foundation; either version 3 of the License, or
     12 # (at your option) any later version.
     13 
     14 # This program is distributed in the hope that it will be useful,
     15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 # GNU General Public License for more details.
     18 
     19 # You should have received a copy of the GNU General Public License
     20 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
     21 
     22 # This generates the ada-casefold.h header.
     23 # Usage:
     24 #   python ada-unicode.py
     25 
     26 import gdbcopyright
     27 
     28 
     29 class Range:
     30     def __init__(self, range_start: int, upper_delta: int, lower_delta: int):
     31         self._range_start = range_start
     32         self._range_end = range_start
     33         self._upper_delta = upper_delta
     34         self._lower_delta = lower_delta
     35 
     36     # The start of the range.
     37     @property
     38     def range_start(self):
     39         return self._range_start
     40 
     41     # The end of the range.
     42     @property
     43     def range_end(self):
     44         return self._range_end
     45 
     46     @range_end.setter
     47     def range_end(self, val: int):
     48         self._range_end = val
     49 
     50     # The delta between RANGE_START and the upper-case variant of that
     51     # character.
     52     @property
     53     def upper_delta(self):
     54         return self._upper_delta
     55 
     56     # The delta between RANGE_START and the lower-case variant of that
     57     # character.
     58     @property
     59     def lower_delta(self):
     60         return self._lower_delta
     61 
     62 
     63 # The current range we are processing.  If None,  then we're outside of a range.
     64 current_range: Range | None = None
     65 
     66 # All the ranges found and completed so far.
     67 all_ranges: list[Range] = []
     68 
     69 
     70 def finish_range():
     71     global current_range
     72 
     73     if current_range is not None:
     74         all_ranges.append(current_range)
     75         current_range = None
     76 
     77 
     78 def process_codepoint(val: int):
     79     global current_range
     80 
     81     c = chr(val)
     82     low = c.lower()
     83     up = c.upper()
     84     # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
     85     # upper-cases to the two-character string "SS" (the capital form
     86     # is a relatively recent addition -- 2017).  Our simple scheme
     87     # can't handle this, so we skip it.  Also, because our approach
     88     # just represents runs of characters with identical folding
     89     # deltas, this change must terminate the current run.
     90     if (c == low and c == up) or len(low) != 1 or len(up) != 1:
     91         finish_range()
     92         return
     93     updelta = ord(up) - val
     94     lowdelta = ord(low) - val
     95 
     96     if current_range is not None and (
     97         updelta != current_range.upper_delta or lowdelta != current_range.lower_delta
     98     ):
     99         finish_range()
    100 
    101     if current_range is None:
    102         current_range = Range(val, updelta, lowdelta)
    103 
    104     current_range.range_end = val
    105 
    106 
    107 for c in range(0, 0x10FFFF):
    108     process_codepoint(c)
    109 
    110 with open("ada-casefold.h", "w") as f:
    111     print(
    112         gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
    113         file=f,
    114     )
    115     print("", file=f)
    116     for r in all_ranges:
    117         print(
    118             f"   {{{r.range_start}, {r.range_end}, {r.upper_delta}, {r.lower_delta}}},",
    119             file=f,
    120         )
    121