1d522f475Smrg#!/usr/bin/perl -w 204b94745Smrg# $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $ 3d522f475Smrg# 4d522f475Smrg# Generate keysym2ucs.c file 5d522f475Smrg# 6d522f475Smrg# See also: 7d522f475Smrg# http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html 8d522f475Smrg# 9d522f475Smrg# $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $ 10d522f475Smrg 11d522f475Smrguse strict; 12d522f475Smrg 13d522f475Smrgour $keysym; 14d522f475Smrgour %name; 15d522f475Smrgour %keysym_to_ucs; 16d522f475Smrgour %keysym_to_keysymname; 17d522f475Smrg 18d522f475Smrgsub utf8 ($); 19d522f475Smrg 20d522f475Smrgsub utf8 ($) { 21d522f475Smrg my $c = shift(@_); 22d522f475Smrg 23d522f475Smrg if ($c < 0x80) { 24d522f475Smrg return sprintf("%c", $c); 25d522f475Smrg } elsif ($c < 0x800) { 26d522f475Smrg return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f)); 27d522f475Smrg } elsif ($c < 0x10000) { 28d522f475Smrg return sprintf("%c%c%c", 29d522f475Smrg 0xe0 | ($c >> 12), 30d522f475Smrg 0x80 | (($c >> 6) & 0x3f), 31d522f475Smrg 0x80 | ( $c & 0x3f)); 32d522f475Smrg } elsif ($c < 0x200000) { 33d522f475Smrg return sprintf("%c%c%c%c", 34d522f475Smrg 0xf0 | ($c >> 18), 35d522f475Smrg 0x80 | (($c >> 12) & 0x3f), 36d522f475Smrg 0x80 | (($c >> 6) & 0x3f), 37d522f475Smrg 0x80 | ( $c & 0x3f)); 38d522f475Smrg } elsif ($c < 0x4000000) { 39d522f475Smrg return sprintf("%c%c%c%c%c", 40d522f475Smrg 0xf8 | ($c >> 24), 41d522f475Smrg 0x80 | (($c >> 18) & 0x3f), 42d522f475Smrg 0x80 | (($c >> 12) & 0x3f), 43d522f475Smrg 0x80 | (($c >> 6) & 0x3f), 44d522f475Smrg 0x80 | ( $c & 0x3f)); 45d522f475Smrg 46d522f475Smrg } elsif ($c < 0x80000000) { 47d522f475Smrg return sprintf("%c%c%c%c%c%c", 48d522f475Smrg 0xfe | ($c >> 30), 49d522f475Smrg 0x80 | (($c >> 24) & 0x3f), 50d522f475Smrg 0x80 | (($c >> 18) & 0x3f), 51d522f475Smrg 0x80 | (($c >> 12) & 0x3f), 52d522f475Smrg 0x80 | (($c >> 6) & 0x3f), 53d522f475Smrg 0x80 | ( $c & 0x3f)); 54d522f475Smrg } else { 55d522f475Smrg return utf8(0xfffd); 56d522f475Smrg } 57d522f475Smrg} 58d522f475Smrg 592e4f8982Smrgmy $unicodedata = "UnicodeData.txt"; 60d522f475Smrg 61d522f475Smrg# read list of all Unicode names 62d522f475Smrgif (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) { 63d522f475Smrg die ("Can't open Unicode database '$unicodedata':\n$!\n\n" . 64d522f475Smrg "Please make sure that you have downloaded the file\n" . 652e4f8982Smrg "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n"); 66d522f475Smrg} 67d522f475Smrgwhile (<UDATA>) { 68d522f475Smrg if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) { 69d522f475Smrg $name{hex($1)} = $2; 70d522f475Smrg } else { 71d522f475Smrg die("Syntax error in line '$_' in file '$unicodedata'"); 72d522f475Smrg } 73d522f475Smrg} 74d522f475Smrgclose(UDATA); 75d522f475Smrg 76d522f475Smrg# read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map) 77d522f475Smrgopen(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n"); 78d522f475Smrgwhile (<LIST>) { 79d522f475Smrg if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){ 80d522f475Smrg my $keysym = hex($1); 81d522f475Smrg my $ucs = hex($2); 82f2e35a3aSmrg my $comment = $3; 83f2e35a3aSmrg $comment =~ s/^#\s*//; 84d522f475Smrg $keysym_to_ucs{$keysym} = $ucs; 85f2e35a3aSmrg $keysym_to_keysymname{$keysym} = $comment; 86d522f475Smrg } elsif (/^\s*\#/ || /^\s*$/) { 87d522f475Smrg } else { 88d522f475Smrg die("Syntax error in 'list' in line\n$_\n"); 89d522f475Smrg } 90d522f475Smrg} 91d522f475Smrgclose(LIST); 92d522f475Smrg 93d522f475Smrg# read entries in keysymdef.h 94d522f475Smrgopen(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n"); 95d522f475Smrgwhile (<LIST>) { 96d522f475Smrg if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) { 97f2e35a3aSmrg next if /\/\* deprecated \*\//; 98f2e35a3aSmrg my $keysymname = $1; 99f2e35a3aSmrg my $keysym = hex($2); 100f2e35a3aSmrg $keysym_to_keysymname{$keysym} = $keysymname; 101d522f475Smrg } 102d522f475Smrg} 103d522f475Smrgclose(LIST); 104d522f475Smrg 105d522f475Smrgprint <<EOT; 106d522f475Smrg/* \$XTermId\$ 107d522f475Smrg * This module converts keysym values into the corresponding ISO 10646 108d522f475Smrg * (UCS, Unicode) values. 109d522f475Smrg * 110d522f475Smrg * The array keysymtab[] contains pairs of X11 keysym values for graphical 111d522f475Smrg * characters and the corresponding Unicode value. The function 112d522f475Smrg * keysym2ucs() maps a keysym onto a Unicode value using a binary search, 113d522f475Smrg * therefore keysymtab[] must remain SORTED by keysym value. 114d522f475Smrg * 115d522f475Smrg * The keysym -> UTF-8 conversion will hopefully one day be provided 116d522f475Smrg * by Xlib via XmbLookupString() and should ideally not have to be 117d522f475Smrg * done in X applications. But we are not there yet. 118d522f475Smrg * 119d522f475Smrg * We allow to represent any UCS character in the range U-00000000 to 120d522f475Smrg * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff. 121d522f475Smrg * This admittedly does not cover the entire 31-bit space of UCS, but 122d522f475Smrg * it does cover all of the characters up to U-10FFFF, which can be 123d522f475Smrg * represented by UTF-16, and more, and it is very unlikely that higher 124d522f475Smrg * UCS codes will ever be assigned by ISO. So to get Unicode character 125d522f475Smrg * U+ABCD you can directly use keysym 0x0100abcd. 126d522f475Smrg * 127d522f475Smrg * NOTE: The comments in the table below contain the actual character 128d522f475Smrg * encoded in UTF-8, so for viewing and editing best use an editor in 129d522f475Smrg * UTF-8 mode. 130d522f475Smrg * 131d522f475Smrg * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001 132d522f475Smrg * 133d522f475Smrg * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing 134d522f475Smrg * an initial draft of the mapping table. 135d522f475Smrg * 136d522f475Smrg * This software is in the public domain. Share and enjoy! 137d522f475Smrg * 138d522f475Smrg * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl) 139d522f475Smrg */ 140d522f475Smrg 141d522f475Smrg#ifndef KEYSYM2UCS_INCLUDED 142f2e35a3aSmrg 143d522f475Smrg#include "keysym2ucs.h" 144d522f475Smrg#define VISIBLE /* */ 145d522f475Smrg 146d522f475Smrg#else 147d522f475Smrg 148d522f475Smrg#define VISIBLE static 149d522f475Smrg 150d522f475Smrg#endif 151d522f475Smrg 152d522f475Smrgstatic struct codepair { 153d522f475Smrg unsigned short keysym; 154d522f475Smrg unsigned short ucs; 155d522f475Smrg} keysymtab[] = { 156d522f475SmrgEOT 157d522f475Smrg 158d522f475Smrgfor $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) { 159d522f475Smrg my $ucs = $keysym_to_ucs{$keysym}; 160d522f475Smrg next if $keysym >= 0xf000 || $keysym < 0x100; 161d522f475Smrg if ($ucs) { 162f2e35a3aSmrg printf(" { 0x%04x, 0x%04x }, /*%28s %s %s */\n", 163f2e35a3aSmrg $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs), 164f2e35a3aSmrg defined($name{$ucs}) ? $name{$ucs} : "???" ); 165d522f475Smrg } else { 166f2e35a3aSmrg printf("/* 0x%04x %39s ? ??? */\n", 167f2e35a3aSmrg $keysym, $keysym_to_keysymname{$keysym}); 168d522f475Smrg } 169d522f475Smrg} 170d522f475Smrg 171d522f475Smrgprint <<EOT; 172d522f475Smrg}; 173d522f475Smrg 174d522f475SmrgVISIBLE 175d522f475Smrglong keysym2ucs(KeySym keysym) 176d522f475Smrg{ 177d522f475Smrg int min = 0; 178d522f475Smrg int max = sizeof(keysymtab) / sizeof(struct codepair) - 1; 179d522f475Smrg 180d522f475Smrg /* first check for Latin-1 characters (1:1 mapping) */ 181d522f475Smrg if ((keysym >= 0x0020 && keysym <= 0x007e) || 182d522f475Smrg (keysym >= 0x00a0 && keysym <= 0x00ff)) 183f2e35a3aSmrg return (long) keysym; 184d522f475Smrg 185d522f475Smrg /* also check for directly encoded 24-bit UCS characters */ 186d522f475Smrg if ((keysym & 0xff000000) == 0x01000000) 187f2e35a3aSmrg return (long) (keysym & 0x00ffffff); 188d522f475Smrg 189d522f475Smrg /* binary search in table */ 190d522f475Smrg while (max >= min) { 191f2e35a3aSmrg int mid = (min + max) / 2; 192f2e35a3aSmrg if (keysymtab[mid].keysym < keysym) 193f2e35a3aSmrg min = mid + 1; 194f2e35a3aSmrg else if (keysymtab[mid].keysym > keysym) 195f2e35a3aSmrg max = mid - 1; 196f2e35a3aSmrg else { 197f2e35a3aSmrg /* found it */ 198f2e35a3aSmrg return keysymtab[mid].ucs; 199f2e35a3aSmrg } 200d522f475Smrg } 201d522f475Smrg 202d522f475Smrg /* no matching Unicode value found */ 203d522f475Smrg return -1; 204d522f475Smrg} 205d522f475SmrgEOT 206