1 #!/usr/bin/env python3 2 3 # Generate Unicode case-folding table for Ada. 4 5 # Copyright (C) 2022-2024 Free Software Foundation, Inc. 6 7 # This file is part of GDB. 8 9 # This program is free software; you can redistribute it and/or modify 10 # it under the terms of the GNU General Public License as published by 11 # the Free Software Foundation; either version 3 of the License, or 12 # (at your option) any later version. 13 14 # This program is distributed in the hope that it will be useful, 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 # GNU General Public License for more details. 18 19 # You should have received a copy of the GNU General Public License 20 # along with this program. If not, see <http://www.gnu.org/licenses/>. 21 22 # This generates the ada-casefold.h header. 23 # Usage: 24 # python ada-unicode.py 25 26 import gdbcopyright 27 28 29 class Range: 30 def __init__(self, range_start: int, upper_delta: int, lower_delta: int): 31 self._range_start = range_start 32 self._range_end = range_start 33 self._upper_delta = upper_delta 34 self._lower_delta = lower_delta 35 36 # The start of the range. 37 @property 38 def range_start(self): 39 return self._range_start 40 41 # The end of the range. 42 @property 43 def range_end(self): 44 return self._range_end 45 46 @range_end.setter 47 def range_end(self, val: int): 48 self._range_end = val 49 50 # The delta between RANGE_START and the upper-case variant of that 51 # character. 52 @property 53 def upper_delta(self): 54 return self._upper_delta 55 56 # The delta between RANGE_START and the lower-case variant of that 57 # character. 58 @property 59 def lower_delta(self): 60 return self._lower_delta 61 62 63 # The current range we are processing. If None, then we're outside of a range. 64 current_range: Range | None = None 65 66 # All the ranges found and completed so far. 67 all_ranges: list[Range] = [] 68 69 70 def finish_range(): 71 global current_range 72 73 if current_range is not None: 74 all_ranges.append(current_range) 75 current_range = None 76 77 78 def process_codepoint(val: int): 79 global current_range 80 81 c = chr(val) 82 low = c.lower() 83 up = c.upper() 84 # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally 85 # upper-cases to the two-character string "SS" (the capital form 86 # is a relatively recent addition -- 2017). Our simple scheme 87 # can't handle this, so we skip it. Also, because our approach 88 # just represents runs of characters with identical folding 89 # deltas, this change must terminate the current run. 90 if (c == low and c == up) or len(low) != 1 or len(up) != 1: 91 finish_range() 92 return 93 updelta = ord(up) - val 94 lowdelta = ord(low) - val 95 96 if current_range is not None and ( 97 updelta != current_range.upper_delta or lowdelta != current_range.lower_delta 98 ): 99 finish_range() 100 101 if current_range is None: 102 current_range = Range(val, updelta, lowdelta) 103 104 current_range.range_end = val 105 106 107 for c in range(0, 0x10FFFF): 108 process_codepoint(c) 109 110 with open("ada-casefold.h", "w") as f: 111 print( 112 gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"), 113 file=f, 114 ) 115 print("", file=f) 116 for r in all_ranges: 117 print( 118 f" {{{r.range_start}, {r.range_end}, {r.upper_delta}, {r.lower_delta}}},", 119 file=f, 120 ) 121