convmap.pl revision f2e35a3a
1#!/usr/bin/perl -w 2# $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $ 3# 4# Generate keysym2ucs.c file 5# 6# See also: 7# http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html 8# 9# $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $ 10 11use strict; 12 13our $keysym; 14our %name; 15our %keysym_to_ucs; 16our %keysym_to_keysymname; 17 18sub utf8 ($); 19 20sub utf8 ($) { 21 my $c = shift(@_); 22 23 if ($c < 0x80) { 24 return sprintf("%c", $c); 25 } elsif ($c < 0x800) { 26 return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f)); 27 } elsif ($c < 0x10000) { 28 return sprintf("%c%c%c", 29 0xe0 | ($c >> 12), 30 0x80 | (($c >> 6) & 0x3f), 31 0x80 | ( $c & 0x3f)); 32 } elsif ($c < 0x200000) { 33 return sprintf("%c%c%c%c", 34 0xf0 | ($c >> 18), 35 0x80 | (($c >> 12) & 0x3f), 36 0x80 | (($c >> 6) & 0x3f), 37 0x80 | ( $c & 0x3f)); 38 } elsif ($c < 0x4000000) { 39 return sprintf("%c%c%c%c%c", 40 0xf8 | ($c >> 24), 41 0x80 | (($c >> 18) & 0x3f), 42 0x80 | (($c >> 12) & 0x3f), 43 0x80 | (($c >> 6) & 0x3f), 44 0x80 | ( $c & 0x3f)); 45 46 } elsif ($c < 0x80000000) { 47 return sprintf("%c%c%c%c%c%c", 48 0xfe | ($c >> 30), 49 0x80 | (($c >> 24) & 0x3f), 50 0x80 | (($c >> 18) & 0x3f), 51 0x80 | (($c >> 12) & 0x3f), 52 0x80 | (($c >> 6) & 0x3f), 53 0x80 | ( $c & 0x3f)); 54 } else { 55 return utf8(0xfffd); 56 } 57} 58 59my $unicodedata = "UnicodeData.txt"; 60 61# read list of all Unicode names 62if (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) { 63 die ("Can't open Unicode database '$unicodedata':\n$!\n\n" . 64 "Please make sure that you have downloaded the file\n" . 65 "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n"); 66} 67while (<UDATA>) { 68 if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) { 69 $name{hex($1)} = $2; 70 } else { 71 die("Syntax error in line '$_' in file '$unicodedata'"); 72 } 73} 74close(UDATA); 75 76# read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map) 77open(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n"); 78while (<LIST>) { 79 if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){ 80 my $keysym = hex($1); 81 my $ucs = hex($2); 82 my $comment = $3; 83 $comment =~ s/^#\s*//; 84 $keysym_to_ucs{$keysym} = $ucs; 85 $keysym_to_keysymname{$keysym} = $comment; 86 } elsif (/^\s*\#/ || /^\s*$/) { 87 } else { 88 die("Syntax error in 'list' in line\n$_\n"); 89 } 90} 91close(LIST); 92 93# read entries in keysymdef.h 94open(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n"); 95while (<LIST>) { 96 if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) { 97 next if /\/\* deprecated \*\//; 98 my $keysymname = $1; 99 my $keysym = hex($2); 100 $keysym_to_keysymname{$keysym} = $keysymname; 101 } 102} 103close(LIST); 104 105print <<EOT; 106/* \$XTermId\$ 107 * This module converts keysym values into the corresponding ISO 10646 108 * (UCS, Unicode) values. 109 * 110 * The array keysymtab[] contains pairs of X11 keysym values for graphical 111 * characters and the corresponding Unicode value. The function 112 * keysym2ucs() maps a keysym onto a Unicode value using a binary search, 113 * therefore keysymtab[] must remain SORTED by keysym value. 114 * 115 * The keysym -> UTF-8 conversion will hopefully one day be provided 116 * by Xlib via XmbLookupString() and should ideally not have to be 117 * done in X applications. But we are not there yet. 118 * 119 * We allow to represent any UCS character in the range U-00000000 to 120 * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff. 121 * This admittedly does not cover the entire 31-bit space of UCS, but 122 * it does cover all of the characters up to U-10FFFF, which can be 123 * represented by UTF-16, and more, and it is very unlikely that higher 124 * UCS codes will ever be assigned by ISO. So to get Unicode character 125 * U+ABCD you can directly use keysym 0x0100abcd. 126 * 127 * NOTE: The comments in the table below contain the actual character 128 * encoded in UTF-8, so for viewing and editing best use an editor in 129 * UTF-8 mode. 130 * 131 * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001 132 * 133 * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing 134 * an initial draft of the mapping table. 135 * 136 * This software is in the public domain. Share and enjoy! 137 * 138 * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl) 139 */ 140 141#ifndef KEYSYM2UCS_INCLUDED 142 143#include "keysym2ucs.h" 144#define VISIBLE /* */ 145 146#else 147 148#define VISIBLE static 149 150#endif 151 152static struct codepair { 153 unsigned short keysym; 154 unsigned short ucs; 155} keysymtab[] = { 156EOT 157 158for $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) { 159 my $ucs = $keysym_to_ucs{$keysym}; 160 next if $keysym >= 0xf000 || $keysym < 0x100; 161 if ($ucs) { 162 printf(" { 0x%04x, 0x%04x }, /*%28s %s %s */\n", 163 $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs), 164 defined($name{$ucs}) ? $name{$ucs} : "???" ); 165 } else { 166 printf("/* 0x%04x %39s ? ??? */\n", 167 $keysym, $keysym_to_keysymname{$keysym}); 168 } 169} 170 171print <<EOT; 172}; 173 174VISIBLE 175long keysym2ucs(KeySym keysym) 176{ 177 int min = 0; 178 int max = sizeof(keysymtab) / sizeof(struct codepair) - 1; 179 180 /* first check for Latin-1 characters (1:1 mapping) */ 181 if ((keysym >= 0x0020 && keysym <= 0x007e) || 182 (keysym >= 0x00a0 && keysym <= 0x00ff)) 183 return (long) keysym; 184 185 /* also check for directly encoded 24-bit UCS characters */ 186 if ((keysym & 0xff000000) == 0x01000000) 187 return (long) (keysym & 0x00ffffff); 188 189 /* binary search in table */ 190 while (max >= min) { 191 int mid = (min + max) / 2; 192 if (keysymtab[mid].keysym < keysym) 193 min = mid + 1; 194 else if (keysymtab[mid].keysym > keysym) 195 max = mid - 1; 196 else { 197 /* found it */ 198 return keysymtab[mid].ucs; 199 } 200 } 201 202 /* no matching Unicode value found */ 203 return -1; 204} 205EOT 206