convmap.pl revision f2e35a3a
18c9fbc29Smrg#!/usr/bin/perl -w 28c9fbc29Smrg# $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $ 38c9fbc29Smrg# 48c9fbc29Smrg# Generate keysym2ucs.c file 58c9fbc29Smrg# 68c9fbc29Smrg# See also: 78c9fbc29Smrg# http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html 88c9fbc29Smrg# 98c9fbc29Smrg# $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $ 108c9fbc29Smrg 118c9fbc29Smrguse strict; 128c9fbc29Smrg 138c9fbc29Smrgour $keysym; 148c9fbc29Smrgour %name; 158c9fbc29Smrgour %keysym_to_ucs; 168c9fbc29Smrgour %keysym_to_keysymname; 178c9fbc29Smrg 188c9fbc29Smrgsub utf8 ($); 198c9fbc29Smrg 208c9fbc29Smrgsub utf8 ($) { 218c9fbc29Smrg my $c = shift(@_); 228c9fbc29Smrg 238c9fbc29Smrg if ($c < 0x80) { 248c9fbc29Smrg return sprintf("%c", $c); 258c9fbc29Smrg } elsif ($c < 0x800) { 268c9fbc29Smrg return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f)); 278c9fbc29Smrg } elsif ($c < 0x10000) { 288c9fbc29Smrg return sprintf("%c%c%c", 298c9fbc29Smrg 0xe0 | ($c >> 12), 308c9fbc29Smrg 0x80 | (($c >> 6) & 0x3f), 318c9fbc29Smrg 0x80 | ( $c & 0x3f)); 328c9fbc29Smrg } elsif ($c < 0x200000) { 338c9fbc29Smrg return sprintf("%c%c%c%c", 348c9fbc29Smrg 0xf0 | ($c >> 18), 358c9fbc29Smrg 0x80 | (($c >> 12) & 0x3f), 368c9fbc29Smrg 0x80 | (($c >> 6) & 0x3f), 378c9fbc29Smrg 0x80 | ( $c & 0x3f)); 388c9fbc29Smrg } elsif ($c < 0x4000000) { 398c9fbc29Smrg return sprintf("%c%c%c%c%c", 408c9fbc29Smrg 0xf8 | ($c >> 24), 418c9fbc29Smrg 0x80 | (($c >> 18) & 0x3f), 428c9fbc29Smrg 0x80 | (($c >> 12) & 0x3f), 438c9fbc29Smrg 0x80 | (($c >> 6) & 0x3f), 448c9fbc29Smrg 0x80 | ( $c & 0x3f)); 458c9fbc29Smrg 468c9fbc29Smrg } elsif ($c < 0x80000000) { 478c9fbc29Smrg return sprintf("%c%c%c%c%c%c", 488c9fbc29Smrg 0xfe | ($c >> 30), 498c9fbc29Smrg 0x80 | (($c >> 24) & 0x3f), 508c9fbc29Smrg 0x80 | (($c >> 18) & 0x3f), 518c9fbc29Smrg 0x80 | (($c >> 12) & 0x3f), 528c9fbc29Smrg 0x80 | (($c >> 6) & 0x3f), 538c9fbc29Smrg 0x80 | ( $c & 0x3f)); 548c9fbc29Smrg } else { 558c9fbc29Smrg return utf8(0xfffd); 568c9fbc29Smrg } 578c9fbc29Smrg} 588c9fbc29Smrg 598c9fbc29Smrgmy $unicodedata = "UnicodeData.txt"; 608c9fbc29Smrg 618c9fbc29Smrg# read list of all Unicode names 628c9fbc29Smrgif (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) { 638c9fbc29Smrg die ("Can't open Unicode database '$unicodedata':\n$!\n\n" . 648c9fbc29Smrg "Please make sure that you have downloaded the file\n" . 658c9fbc29Smrg "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n"); 668c9fbc29Smrg} 678c9fbc29Smrgwhile (<UDATA>) { 688c9fbc29Smrg if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) { 698c9fbc29Smrg $name{hex($1)} = $2; 708c9fbc29Smrg } else { 718c9fbc29Smrg die("Syntax error in line '$_' in file '$unicodedata'"); 728c9fbc29Smrg } 738c9fbc29Smrg} 748c9fbc29Smrgclose(UDATA); 758c9fbc29Smrg 768c9fbc29Smrg# read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map) 778c9fbc29Smrgopen(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n"); 788c9fbc29Smrgwhile (<LIST>) { 798c9fbc29Smrg if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){ 808c9fbc29Smrg my $keysym = hex($1); 818c9fbc29Smrg my $ucs = hex($2); 828c9fbc29Smrg my $comment = $3; 838c9fbc29Smrg $comment =~ s/^#\s*//; 848c9fbc29Smrg $keysym_to_ucs{$keysym} = $ucs; 858c9fbc29Smrg $keysym_to_keysymname{$keysym} = $comment; 868c9fbc29Smrg } elsif (/^\s*\#/ || /^\s*$/) { 878c9fbc29Smrg } else { 888c9fbc29Smrg die("Syntax error in 'list' in line\n$_\n"); 898c9fbc29Smrg } 908c9fbc29Smrg} 918c9fbc29Smrgclose(LIST); 928c9fbc29Smrg 938c9fbc29Smrg# read entries in keysymdef.h 948c9fbc29Smrgopen(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n"); 958c9fbc29Smrgwhile (<LIST>) { 968c9fbc29Smrg if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) { 978c9fbc29Smrg next if /\/\* deprecated \*\//; 988c9fbc29Smrg my $keysymname = $1; 998c9fbc29Smrg my $keysym = hex($2); 1008c9fbc29Smrg $keysym_to_keysymname{$keysym} = $keysymname; 1018c9fbc29Smrg } 1028c9fbc29Smrg} 1038c9fbc29Smrgclose(LIST); 1048c9fbc29Smrg 1058c9fbc29Smrgprint <<EOT; 1068c9fbc29Smrg/* \$XTermId\$ 1078c9fbc29Smrg * This module converts keysym values into the corresponding ISO 10646 1088c9fbc29Smrg * (UCS, Unicode) values. 1098c9fbc29Smrg * 1108c9fbc29Smrg * The array keysymtab[] contains pairs of X11 keysym values for graphical 1118c9fbc29Smrg * characters and the corresponding Unicode value. The function 1128c9fbc29Smrg * keysym2ucs() maps a keysym onto a Unicode value using a binary search, 1138c9fbc29Smrg * therefore keysymtab[] must remain SORTED by keysym value. 1148c9fbc29Smrg * 1158c9fbc29Smrg * The keysym -> UTF-8 conversion will hopefully one day be provided 1168c9fbc29Smrg * by Xlib via XmbLookupString() and should ideally not have to be 1178c9fbc29Smrg * done in X applications. But we are not there yet. 1188c9fbc29Smrg * 1198c9fbc29Smrg * We allow to represent any UCS character in the range U-00000000 to 1208c9fbc29Smrg * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff. 1218c9fbc29Smrg * This admittedly does not cover the entire 31-bit space of UCS, but 1228c9fbc29Smrg * it does cover all of the characters up to U-10FFFF, which can be 1238c9fbc29Smrg * represented by UTF-16, and more, and it is very unlikely that higher 1248c9fbc29Smrg * UCS codes will ever be assigned by ISO. So to get Unicode character 1258c9fbc29Smrg * U+ABCD you can directly use keysym 0x0100abcd. 1268c9fbc29Smrg * 1278c9fbc29Smrg * NOTE: The comments in the table below contain the actual character 1288c9fbc29Smrg * encoded in UTF-8, so for viewing and editing best use an editor in 1298c9fbc29Smrg * UTF-8 mode. 1308c9fbc29Smrg * 1318c9fbc29Smrg * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001 1328c9fbc29Smrg * 1338c9fbc29Smrg * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing 1348c9fbc29Smrg * an initial draft of the mapping table. 1358c9fbc29Smrg * 1368c9fbc29Smrg * This software is in the public domain. Share and enjoy! 1378c9fbc29Smrg * 1388c9fbc29Smrg * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl) 1398c9fbc29Smrg */ 1408c9fbc29Smrg 1418c9fbc29Smrg#ifndef KEYSYM2UCS_INCLUDED 1428c9fbc29Smrg 1438c9fbc29Smrg#include "keysym2ucs.h" 1448c9fbc29Smrg#define VISIBLE /* */ 1458c9fbc29Smrg 1468c9fbc29Smrg#else 1478c9fbc29Smrg 1488c9fbc29Smrg#define VISIBLE static 1498c9fbc29Smrg 1508c9fbc29Smrg#endif 1518c9fbc29Smrg 1528c9fbc29Smrgstatic struct codepair { 1538c9fbc29Smrg unsigned short keysym; 1548c9fbc29Smrg unsigned short ucs; 1558c9fbc29Smrg} keysymtab[] = { 1568c9fbc29SmrgEOT 1578c9fbc29Smrg 1588c9fbc29Smrgfor $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) { 1598c9fbc29Smrg my $ucs = $keysym_to_ucs{$keysym}; 1608c9fbc29Smrg next if $keysym >= 0xf000 || $keysym < 0x100; 1618c9fbc29Smrg if ($ucs) { 1628c9fbc29Smrg printf(" { 0x%04x, 0x%04x }, /*%28s %s %s */\n", 1638c9fbc29Smrg $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs), 1648c9fbc29Smrg defined($name{$ucs}) ? $name{$ucs} : "???" ); 1658c9fbc29Smrg } else { 1668c9fbc29Smrg printf("/* 0x%04x %39s ? ??? */\n", 1678c9fbc29Smrg $keysym, $keysym_to_keysymname{$keysym}); 1688c9fbc29Smrg } 1698c9fbc29Smrg} 1708c9fbc29Smrg 1718c9fbc29Smrgprint <<EOT; 1728c9fbc29Smrg}; 1738c9fbc29Smrg 1748c9fbc29SmrgVISIBLE 1758c9fbc29Smrglong keysym2ucs(KeySym keysym) 1768c9fbc29Smrg{ 1778c9fbc29Smrg int min = 0; 1788c9fbc29Smrg int max = sizeof(keysymtab) / sizeof(struct codepair) - 1; 1798c9fbc29Smrg 1808c9fbc29Smrg /* first check for Latin-1 characters (1:1 mapping) */ 1818c9fbc29Smrg if ((keysym >= 0x0020 && keysym <= 0x007e) || 1828c9fbc29Smrg (keysym >= 0x00a0 && keysym <= 0x00ff)) 1838c9fbc29Smrg return (long) keysym; 1848c9fbc29Smrg 1858c9fbc29Smrg /* also check for directly encoded 24-bit UCS characters */ 1868c9fbc29Smrg if ((keysym & 0xff000000) == 0x01000000) 1878c9fbc29Smrg return (long) (keysym & 0x00ffffff); 1888c9fbc29Smrg 1898c9fbc29Smrg /* binary search in table */ 1908c9fbc29Smrg while (max >= min) { 1918c9fbc29Smrg int mid = (min + max) / 2; 1928c9fbc29Smrg if (keysymtab[mid].keysym < keysym) 1938c9fbc29Smrg min = mid + 1; 1948c9fbc29Smrg else if (keysymtab[mid].keysym > keysym) 1958c9fbc29Smrg max = mid - 1; 1968c9fbc29Smrg else { 1978c9fbc29Smrg /* found it */ 1988c9fbc29Smrg return keysymtab[mid].ucs; 1998c9fbc29Smrg } 2008c9fbc29Smrg } 2018c9fbc29Smrg 2028c9fbc29Smrg /* no matching Unicode value found */ 2038c9fbc29Smrg return -1; 2048c9fbc29Smrg} 2058c9fbc29SmrgEOT 2068c9fbc29Smrg