1d522f475Smrg#!/usr/bin/perl -w
204b94745Smrg# $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $
3d522f475Smrg#
4d522f475Smrg# Generate keysym2ucs.c file
5d522f475Smrg#
6d522f475Smrg# See also:
7d522f475Smrg# http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html
8d522f475Smrg#
9d522f475Smrg# $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $
10d522f475Smrg
11d522f475Smrguse strict;
12d522f475Smrg
13d522f475Smrgour $keysym;
14d522f475Smrgour %name;
15d522f475Smrgour %keysym_to_ucs;
16d522f475Smrgour %keysym_to_keysymname;
17d522f475Smrg
18d522f475Smrgsub utf8 ($);
19d522f475Smrg
20d522f475Smrgsub utf8 ($) {
21d522f475Smrg    my $c = shift(@_);
22d522f475Smrg
23d522f475Smrg    if ($c < 0x80) {
24d522f475Smrg        return sprintf("%c", $c);
25d522f475Smrg    } elsif ($c < 0x800) {
26d522f475Smrg        return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f));
27d522f475Smrg    } elsif ($c < 0x10000) {
28d522f475Smrg        return sprintf("%c%c%c",
29d522f475Smrg                       0xe0 |  ($c >> 12),
30d522f475Smrg                       0x80 | (($c >>  6) & 0x3f),
31d522f475Smrg                       0x80 | ( $c        & 0x3f));
32d522f475Smrg    } elsif ($c < 0x200000) {
33d522f475Smrg        return sprintf("%c%c%c%c",
34d522f475Smrg                       0xf0 |  ($c >> 18),
35d522f475Smrg                       0x80 | (($c >> 12) & 0x3f),
36d522f475Smrg                       0x80 | (($c >>  6) & 0x3f),
37d522f475Smrg                       0x80 | ( $c        & 0x3f));
38d522f475Smrg    } elsif ($c < 0x4000000) {
39d522f475Smrg        return sprintf("%c%c%c%c%c",
40d522f475Smrg                       0xf8 |  ($c >> 24),
41d522f475Smrg                       0x80 | (($c >> 18) & 0x3f),
42d522f475Smrg                       0x80 | (($c >> 12) & 0x3f),
43d522f475Smrg                       0x80 | (($c >>  6) & 0x3f),
44d522f475Smrg                       0x80 | ( $c        & 0x3f));
45d522f475Smrg
46d522f475Smrg    } elsif ($c < 0x80000000) {
47d522f475Smrg        return sprintf("%c%c%c%c%c%c",
48d522f475Smrg                       0xfe |  ($c >> 30),
49d522f475Smrg                       0x80 | (($c >> 24) & 0x3f),
50d522f475Smrg                       0x80 | (($c >> 18) & 0x3f),
51d522f475Smrg                       0x80 | (($c >> 12) & 0x3f),
52d522f475Smrg                       0x80 | (($c >> 6)  & 0x3f),
53d522f475Smrg                       0x80 | ( $c        & 0x3f));
54d522f475Smrg    } else {
55d522f475Smrg        return utf8(0xfffd);
56d522f475Smrg    }
57d522f475Smrg}
58d522f475Smrg
592e4f8982Smrgmy $unicodedata = "UnicodeData.txt";
60d522f475Smrg
61d522f475Smrg# read list of all Unicode names
62d522f475Smrgif (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) {
63d522f475Smrg    die ("Can't open Unicode database '$unicodedata':\n$!\n\n" .
64d522f475Smrg         "Please make sure that you have downloaded the file\n" .
652e4f8982Smrg         "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n");
66d522f475Smrg}
67d522f475Smrgwhile (<UDATA>) {
68d522f475Smrg    if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
69d522f475Smrg        $name{hex($1)} = $2;
70d522f475Smrg    } else {
71d522f475Smrg        die("Syntax error in line '$_' in file '$unicodedata'");
72d522f475Smrg    }
73d522f475Smrg}
74d522f475Smrgclose(UDATA);
75d522f475Smrg
76d522f475Smrg# read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map)
77d522f475Smrgopen(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n");
78d522f475Smrgwhile (<LIST>) {
79d522f475Smrg    if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){
80d522f475Smrg        my $keysym = hex($1);
81d522f475Smrg        my $ucs = hex($2);
82f2e35a3aSmrg        my $comment = $3;
83f2e35a3aSmrg        $comment =~ s/^#\s*//;
84d522f475Smrg        $keysym_to_ucs{$keysym} = $ucs;
85f2e35a3aSmrg        $keysym_to_keysymname{$keysym} = $comment;
86d522f475Smrg    } elsif (/^\s*\#/ || /^\s*$/) {
87d522f475Smrg    } else {
88d522f475Smrg        die("Syntax error in 'list' in line\n$_\n");
89d522f475Smrg    }
90d522f475Smrg}
91d522f475Smrgclose(LIST);
92d522f475Smrg
93d522f475Smrg# read entries in keysymdef.h
94d522f475Smrgopen(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n");
95d522f475Smrgwhile (<LIST>) {
96d522f475Smrg    if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) {
97f2e35a3aSmrg        next if /\/\* deprecated \*\//;
98f2e35a3aSmrg        my $keysymname = $1;
99f2e35a3aSmrg        my $keysym = hex($2);
100f2e35a3aSmrg        $keysym_to_keysymname{$keysym} = $keysymname;
101d522f475Smrg    }
102d522f475Smrg}
103d522f475Smrgclose(LIST);
104d522f475Smrg
105d522f475Smrgprint <<EOT;
106d522f475Smrg/* \$XTermId\$
107d522f475Smrg * This module converts keysym values into the corresponding ISO 10646
108d522f475Smrg * (UCS, Unicode) values.
109d522f475Smrg *
110d522f475Smrg * The array keysymtab[] contains pairs of X11 keysym values for graphical
111d522f475Smrg * characters and the corresponding Unicode value. The function
112d522f475Smrg * keysym2ucs() maps a keysym onto a Unicode value using a binary search,
113d522f475Smrg * therefore keysymtab[] must remain SORTED by keysym value.
114d522f475Smrg *
115d522f475Smrg * The keysym -> UTF-8 conversion will hopefully one day be provided
116d522f475Smrg * by Xlib via XmbLookupString() and should ideally not have to be
117d522f475Smrg * done in X applications. But we are not there yet.
118d522f475Smrg *
119d522f475Smrg * We allow to represent any UCS character in the range U-00000000 to
120d522f475Smrg * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
121d522f475Smrg * This admittedly does not cover the entire 31-bit space of UCS, but
122d522f475Smrg * it does cover all of the characters up to U-10FFFF, which can be
123d522f475Smrg * represented by UTF-16, and more, and it is very unlikely that higher
124d522f475Smrg * UCS codes will ever be assigned by ISO. So to get Unicode character
125d522f475Smrg * U+ABCD you can directly use keysym 0x0100abcd.
126d522f475Smrg *
127d522f475Smrg * NOTE: The comments in the table below contain the actual character
128d522f475Smrg * encoded in UTF-8, so for viewing and editing best use an editor in
129d522f475Smrg * UTF-8 mode.
130d522f475Smrg *
131d522f475Smrg * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001
132d522f475Smrg *
133d522f475Smrg * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing
134d522f475Smrg * an initial draft of the mapping table.
135d522f475Smrg *
136d522f475Smrg * This software is in the public domain. Share and enjoy!
137d522f475Smrg *
138d522f475Smrg * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl)
139d522f475Smrg */
140d522f475Smrg
141d522f475Smrg#ifndef KEYSYM2UCS_INCLUDED
142f2e35a3aSmrg
143d522f475Smrg#include "keysym2ucs.h"
144d522f475Smrg#define VISIBLE /* */
145d522f475Smrg
146d522f475Smrg#else
147d522f475Smrg
148d522f475Smrg#define VISIBLE static
149d522f475Smrg
150d522f475Smrg#endif
151d522f475Smrg
152d522f475Smrgstatic struct codepair {
153d522f475Smrg  unsigned short keysym;
154d522f475Smrg  unsigned short ucs;
155d522f475Smrg} keysymtab[] = {
156d522f475SmrgEOT
157d522f475Smrg
158d522f475Smrgfor $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) {
159d522f475Smrg    my $ucs = $keysym_to_ucs{$keysym};
160d522f475Smrg    next if $keysym >= 0xf000 || $keysym < 0x100;
161d522f475Smrg    if ($ucs) {
162f2e35a3aSmrg        printf("  { 0x%04x, 0x%04x }, /*%28s %s %s */\n",
163f2e35a3aSmrg               $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs),
164f2e35a3aSmrg               defined($name{$ucs}) ? $name{$ucs} : "???" );
165d522f475Smrg    } else {
166f2e35a3aSmrg        printf("/*  0x%04x   %39s ? ??? */\n",
167f2e35a3aSmrg               $keysym, $keysym_to_keysymname{$keysym});
168d522f475Smrg    }
169d522f475Smrg}
170d522f475Smrg
171d522f475Smrgprint <<EOT;
172d522f475Smrg};
173d522f475Smrg
174d522f475SmrgVISIBLE
175d522f475Smrglong keysym2ucs(KeySym keysym)
176d522f475Smrg{
177d522f475Smrg    int min = 0;
178d522f475Smrg    int max = sizeof(keysymtab) / sizeof(struct codepair) - 1;
179d522f475Smrg
180d522f475Smrg    /* first check for Latin-1 characters (1:1 mapping) */
181d522f475Smrg    if ((keysym >= 0x0020 && keysym <= 0x007e) ||
182d522f475Smrg        (keysym >= 0x00a0 && keysym <= 0x00ff))
183f2e35a3aSmrg        return (long) keysym;
184d522f475Smrg
185d522f475Smrg    /* also check for directly encoded 24-bit UCS characters */
186d522f475Smrg    if ((keysym & 0xff000000) == 0x01000000)
187f2e35a3aSmrg        return (long) (keysym & 0x00ffffff);
188d522f475Smrg
189d522f475Smrg    /* binary search in table */
190d522f475Smrg    while (max >= min) {
191f2e35a3aSmrg        int mid = (min + max) / 2;
192f2e35a3aSmrg        if (keysymtab[mid].keysym < keysym)
193f2e35a3aSmrg            min = mid + 1;
194f2e35a3aSmrg        else if (keysymtab[mid].keysym > keysym)
195f2e35a3aSmrg            max = mid - 1;
196f2e35a3aSmrg        else {
197f2e35a3aSmrg            /* found it */
198f2e35a3aSmrg            return keysymtab[mid].ucs;
199f2e35a3aSmrg        }
200d522f475Smrg    }
201d522f475Smrg
202d522f475Smrg    /* no matching Unicode value found */
203d522f475Smrg    return -1;
204d522f475Smrg}
205d522f475SmrgEOT
206