charsets.c revision 0bd37d32
1/* $XTermId: charsets.c,v 1.42 2011/09/11 14:40:17 tom Exp $ */
2
3/*
4 * Copyright 1998-2009,2011 by Thomas E. Dickey
5 *
6 *                         All Rights Reserved
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 * Except as contained in this notice, the name(s) of the above copyright
28 * holders shall not be used in advertising or otherwise to promote the
29 * sale, use or other dealings in this Software without prior written
30 * authorization.
31 *
32 */
33
34#include <xterm.h>
35#include <data.h>
36#include <fontutils.h>
37
38#include <X11/keysym.h>
39
40/*
41 * This module performs translation as needed to support the DEC VT220 national
42 * replacement character sets.  We assume that xterm's font is based on the ISO
43 * 8859-1 (Latin 1) character set, which is almost the same as the DEC
44 * multinational character set.  Glyph positions 0-31 have to be the DEC
45 * graphic characters, though.
46 *
47 * References:
48 *	"VT220 Programmer Pocket Guide" EK-VT220-HR-002 (2nd ed., 1984), which
49 *		contains character charts for the national character sets.
50 *	"VT330/VT340 Programmer Reference Manual Volume 1: Text Programming"
51 *		EK-VT3XX-TP-001 (1st ed, 1987), which contains a table (2-1)
52 *		listing the glyphs which are mapped from the multinational
53 *		character set to the national character set.
54 *
55 * The latter reference, though easier to read, has a few errors and omissions.
56 */
57
58/*
59 * Translate an input keysym to the corresponding NRC keysym.
60 */
61unsigned
62xtermCharSetIn(unsigned code, int charset)
63{
64#define MAP(to, from) case from: code = to; break
65
66    if (code >= 128 && code < 256) {
67	switch (charset) {
68	case 'A':		/* United Kingdom set (or Latin 1)      */
69	    if (code == XK_sterling)
70		code = 0x23;
71	    code &= 0x7f;
72	    break;
73
74#if OPT_XMC_GLITCH
75	case '?':
76#endif
77	case '1':		/* Alternate Character ROM standard characters */
78	case '2':		/* Alternate Character ROM special graphics */
79	case 'B':		/* ASCII set                            */
80	    break;
81
82	case '0':		/* special graphics (line drawing)      */
83	    break;
84
85	case '4':		/* Dutch */
86	    switch (code) {
87		MAP(0x23, XK_sterling);
88		MAP(0x40, XK_threequarters);
89		MAP(0x5b, XK_ydiaeresis);
90		MAP(0x5c, XK_onehalf);
91		MAP(0x5d, XK_bar);	/* glyph is not ISO-8859-1 */
92		MAP(0x7b, XK_diaeresis);
93		MAP(0x7c, XK_f);	/* glyph is not ISO-8859-1 */
94		MAP(0x7d, XK_onequarter);
95		MAP(0x7e, XK_acute);
96	    }
97	    break;
98
99	case 'C':
100	case '5':		/* Finnish */
101	    switch (code) {
102		MAP(0x5b, XK_Adiaeresis);
103		MAP(0x5c, XK_Odiaeresis);
104		MAP(0x5d, XK_Aring);
105		MAP(0x5e, XK_Udiaeresis);
106		MAP(0x60, XK_eacute);
107		MAP(0x7b, XK_adiaeresis);
108		MAP(0x7c, XK_odiaeresis);
109		MAP(0x7d, XK_aring);
110		MAP(0x7e, XK_udiaeresis);
111	    }
112	    break;
113
114	case 'R':		/* French */
115	    switch (code) {
116		MAP(0x23, XK_sterling);
117		MAP(0x40, XK_agrave);
118		MAP(0x5b, XK_degree);
119		MAP(0x5c, XK_ccedilla);
120		MAP(0x5d, XK_section);
121		MAP(0x7b, XK_eacute);
122		MAP(0x7c, XK_ugrave);
123		MAP(0x7d, XK_egrave);
124		MAP(0x7e, XK_diaeresis);
125	    }
126	    break;
127
128	case 'Q':		/* French Canadian */
129	    switch (code) {
130		MAP(0x40, XK_agrave);
131		MAP(0x5b, XK_acircumflex);
132		MAP(0x5c, XK_ccedilla);
133		MAP(0x5d, XK_ecircumflex);
134		MAP(0x5e, XK_icircumflex);
135		MAP(0x60, XK_ocircumflex);
136		MAP(0x7b, XK_eacute);
137		MAP(0x7c, XK_ugrave);
138		MAP(0x7d, XK_egrave);
139		MAP(0x7e, XK_ucircumflex);
140	    }
141	    break;
142
143	case 'K':		/* German */
144	    switch (code) {
145		MAP(0x40, XK_section);
146		MAP(0x5b, XK_Adiaeresis);
147		MAP(0x5c, XK_Odiaeresis);
148		MAP(0x5d, XK_Udiaeresis);
149		MAP(0x7b, XK_adiaeresis);
150		MAP(0x7c, XK_odiaeresis);
151		MAP(0x7d, XK_udiaeresis);
152		MAP(0x7e, XK_ssharp);
153	    }
154	    break;
155
156	case 'Y':		/* Italian */
157	    switch (code) {
158		MAP(0x23, XK_sterling);
159		MAP(0x40, XK_section);
160		MAP(0x5b, XK_degree);
161		MAP(0x5c, XK_ccedilla);
162		MAP(0x5d, XK_eacute);
163		MAP(0x60, XK_ugrave);
164		MAP(0x7b, XK_agrave);
165		MAP(0x7c, XK_ograve);
166		MAP(0x7d, XK_egrave);
167		MAP(0x7e, XK_igrave);
168	    }
169	    break;
170
171	case 'E':
172	case '6':		/* Norwegian/Danish */
173	    switch (code) {
174		MAP(0x40, XK_Adiaeresis);
175		MAP(0x5b, XK_AE);
176		MAP(0x5c, XK_Ooblique);
177		MAP(0x5d, XK_Aring);
178		MAP(0x5e, XK_Udiaeresis);
179		MAP(0x60, XK_adiaeresis);
180		MAP(0x7b, XK_ae);
181		MAP(0x7c, XK_oslash);
182		MAP(0x7d, XK_aring);
183		MAP(0x7e, XK_udiaeresis);
184	    }
185	    break;
186
187	case 'Z':		/* Spanish */
188	    switch (code) {
189		MAP(0x23, XK_sterling);
190		MAP(0x40, XK_section);
191		MAP(0x5b, XK_exclamdown);
192		MAP(0x5c, XK_Ntilde);
193		MAP(0x5d, XK_questiondown);
194		MAP(0x7b, XK_degree);
195		MAP(0x7c, XK_ntilde);
196		MAP(0x7d, XK_ccedilla);
197	    }
198	    break;
199
200	case 'H':
201	case '7':		/* Swedish */
202	    switch (code) {
203		MAP(0x40, XK_Eacute);
204		MAP(0x5b, XK_Adiaeresis);
205		MAP(0x5c, XK_Odiaeresis);
206		MAP(0x5d, XK_Aring);
207		MAP(0x5e, XK_Udiaeresis);
208		MAP(0x60, XK_eacute);
209		MAP(0x7b, XK_adiaeresis);
210		MAP(0x7c, XK_odiaeresis);
211		MAP(0x7d, XK_aring);
212		MAP(0x7e, XK_udiaeresis);
213	    }
214	    break;
215
216	case '=':		/* Swiss */
217	    switch (code) {
218		MAP(0x23, XK_ugrave);
219		MAP(0x40, XK_agrave);
220		MAP(0x5b, XK_eacute);
221		MAP(0x5c, XK_ccedilla);
222		MAP(0x5d, XK_ecircumflex);
223		MAP(0x5e, XK_icircumflex);
224		MAP(0x5f, XK_egrave);
225		MAP(0x60, XK_ocircumflex);
226		MAP(0x7b, XK_adiaeresis);
227		MAP(0x7c, XK_odiaeresis);
228		MAP(0x7d, XK_udiaeresis);
229		MAP(0x7e, XK_ucircumflex);
230	    }
231	    break;
232
233	default:		/* any character sets we don't recognize */
234	    break;
235	}
236	code &= 0x7f;		/* NRC in any case is 7-bit */
237    }
238    return code;
239#undef MAP
240}
241
242/*
243 * Translate a string to the display form.  This assumes the font has the
244 * DEC graphic characters in cells 0-31, and otherwise is ISO-8859-1.
245 */
246int
247xtermCharSetOut(XtermWidget xw, IChar * buf, IChar * ptr, int leftset)
248{
249    IChar *s;
250    TScreen *screen = TScreenOf(xw);
251    int count = 0;
252    int rightset = screen->gsets[(int) (screen->curgr)];
253
254#define MAP(from, to) case from: chr = to; break
255
256    TRACE(("CHARSET GL=%c(G%d) GR=%c(G%d) SS%d\n\t%s\n",
257	   leftset, screen->curgl,
258	   rightset, screen->curgr,
259	   screen->curss,
260	   visibleIChar(buf, (unsigned) (ptr - buf))));
261
262    for (s = buf; s < ptr; ++s) {
263	int eight = CharOf(E2A(*s));
264	int seven = eight & 0x7f;
265	int cs = (eight >= 128) ? rightset : leftset;
266	int chr = eight;
267
268	count++;
269#if OPT_WIDE_CHARS
270	/*
271	 * This is only partly right - prevent inadvertant remapping of
272	 * the replacement character and other non-8bit codes into bogus
273	 * 8bit codes.
274	 */
275	if (screen->utf8_mode) {
276	    if (*s > 255)
277		continue;
278	}
279#endif
280	switch (cs) {
281	case 'A':		/* United Kingdom set (or Latin 1)      */
282	    if ((xw->flags & NATIONAL)
283		|| (screen->vtXX_level <= 1)) {
284		if (chr == 0x23) {
285#if OPT_WIDE_CHARS
286		    chr = (screen->utf8_mode
287			   ? 0xa3
288			   : XTERM_POUND);
289#else
290		    chr = XTERM_POUND;
291#endif
292		}
293	    } else {
294		chr = (seven | 0x80);
295	    }
296	    break;
297
298#if OPT_XMC_GLITCH
299	case '?':
300#endif
301	case '1':		/* Alternate Character ROM standard characters */
302	case '2':		/* Alternate Character ROM special graphics */
303	case 'B':		/* ASCII set                            */
304	    break;
305
306	case '0':		/* special graphics (line drawing)      */
307	    if (seven > 0x5f && seven <= 0x7e) {
308#if OPT_WIDE_CHARS
309		if (screen->utf8_mode)
310		    chr = (int) dec2ucs((unsigned) (seven - 0x5f));
311		else
312#endif
313		    chr = seven - 0x5f;
314	    } else {
315		chr = seven;
316	    }
317	    break;
318
319	case '4':		/* Dutch */
320	    switch (chr = seven) {
321		MAP(0x23, XK_sterling);
322		MAP(0x40, XK_threequarters);
323		MAP(0x5b, XK_ydiaeresis);
324		MAP(0x5c, XK_onehalf);
325		MAP(0x5d, XK_bar);
326		MAP(0x7b, XK_diaeresis);
327		MAP(0x7c, XK_f);
328		MAP(0x7d, XK_onequarter);
329		MAP(0x7e, XK_acute);
330	    }
331	    break;
332
333	case 'C':
334	case '5':		/* Finnish */
335	    switch (chr = seven) {
336		MAP(0x5b, XK_Adiaeresis);
337		MAP(0x5c, XK_Odiaeresis);
338		MAP(0x5d, XK_Aring);
339		MAP(0x5e, XK_Udiaeresis);
340		MAP(0x60, XK_eacute);
341		MAP(0x7b, XK_adiaeresis);
342		MAP(0x7c, XK_odiaeresis);
343		MAP(0x7d, XK_aring);
344		MAP(0x7e, XK_udiaeresis);
345	    }
346	    break;
347
348	case 'R':		/* French */
349	    switch (chr = seven) {
350		MAP(0x23, XK_sterling);
351		MAP(0x40, XK_agrave);
352		MAP(0x5b, XK_degree);
353		MAP(0x5c, XK_ccedilla);
354		MAP(0x5d, XK_section);
355		MAP(0x7b, XK_eacute);
356		MAP(0x7c, XK_ugrave);
357		MAP(0x7d, XK_egrave);
358		MAP(0x7e, XK_diaeresis);
359	    }
360	    break;
361
362	case 'Q':		/* French Canadian */
363	    switch (chr = seven) {
364		MAP(0x40, XK_agrave);
365		MAP(0x5b, XK_acircumflex);
366		MAP(0x5c, XK_ccedilla);
367		MAP(0x5d, XK_ecircumflex);
368		MAP(0x5e, XK_icircumflex);
369		MAP(0x60, XK_ocircumflex);
370		MAP(0x7b, XK_eacute);
371		MAP(0x7c, XK_ugrave);
372		MAP(0x7d, XK_egrave);
373		MAP(0x7e, XK_ucircumflex);
374	    }
375	    break;
376
377	case 'K':		/* German */
378	    switch (chr = seven) {
379		MAP(0x40, XK_section);
380		MAP(0x5b, XK_Adiaeresis);
381		MAP(0x5c, XK_Odiaeresis);
382		MAP(0x5d, XK_Udiaeresis);
383		MAP(0x7b, XK_adiaeresis);
384		MAP(0x7c, XK_odiaeresis);
385		MAP(0x7d, XK_udiaeresis);
386		MAP(0x7e, XK_ssharp);
387	    }
388	    break;
389
390	case 'Y':		/* Italian */
391	    switch (chr = seven) {
392		MAP(0x23, XK_sterling);
393		MAP(0x40, XK_section);
394		MAP(0x5b, XK_degree);
395		MAP(0x5c, XK_ccedilla);
396		MAP(0x5d, XK_eacute);
397		MAP(0x60, XK_ugrave);
398		MAP(0x7b, XK_agrave);
399		MAP(0x7c, XK_ograve);
400		MAP(0x7d, XK_egrave);
401		MAP(0x7e, XK_igrave);
402	    }
403	    break;
404
405	case 'E':
406	case '6':		/* Norwegian/Danish */
407	    switch (chr = seven) {
408		MAP(0x40, XK_Adiaeresis);
409		MAP(0x5b, XK_AE);
410		MAP(0x5c, XK_Ooblique);
411		MAP(0x5d, XK_Aring);
412		MAP(0x5e, XK_Udiaeresis);
413		MAP(0x60, XK_adiaeresis);
414		MAP(0x7b, XK_ae);
415		MAP(0x7c, XK_oslash);
416		MAP(0x7d, XK_aring);
417		MAP(0x7e, XK_udiaeresis);
418	    }
419	    break;
420
421	case 'Z':		/* Spanish */
422	    switch (chr = seven) {
423		MAP(0x23, XK_sterling);
424		MAP(0x40, XK_section);
425		MAP(0x5b, XK_exclamdown);
426		MAP(0x5c, XK_Ntilde);
427		MAP(0x5d, XK_questiondown);
428		MAP(0x7b, XK_degree);
429		MAP(0x7c, XK_ntilde);
430		MAP(0x7d, XK_ccedilla);
431	    }
432	    break;
433
434	case 'H':
435	case '7':		/* Swedish */
436	    switch (chr = seven) {
437		MAP(0x40, XK_Eacute);
438		MAP(0x5b, XK_Adiaeresis);
439		MAP(0x5c, XK_Odiaeresis);
440		MAP(0x5d, XK_Aring);
441		MAP(0x5e, XK_Udiaeresis);
442		MAP(0x60, XK_eacute);
443		MAP(0x7b, XK_adiaeresis);
444		MAP(0x7c, XK_odiaeresis);
445		MAP(0x7d, XK_aring);
446		MAP(0x7e, XK_udiaeresis);
447	    }
448	    break;
449
450	case '=':		/* Swiss */
451	    switch (chr = seven) {
452		MAP(0x23, XK_ugrave);
453		MAP(0x40, XK_agrave);
454		MAP(0x5b, XK_eacute);
455		MAP(0x5c, XK_ccedilla);
456		MAP(0x5d, XK_ecircumflex);
457		MAP(0x5e, XK_icircumflex);
458		MAP(0x5f, XK_egrave);
459		MAP(0x60, XK_ocircumflex);
460		MAP(0x7b, XK_adiaeresis);
461		MAP(0x7c, XK_odiaeresis);
462		MAP(0x7d, XK_udiaeresis);
463		MAP(0x7e, XK_ucircumflex);
464	    }
465	    break;
466
467	default:		/* any character sets we don't recognize */
468	    count--;
469	    break;
470	}
471	/*
472	 * The state machine already treated DEL as a nonprinting and
473	 * nonspacing character.  If we have DEL now, simply render
474	 * it as a blank.
475	 */
476	if (chr == ANSI_DEL)
477	    chr = ' ';
478	*s = (IChar) A2E(chr);
479    }
480    TRACE(("%d\t%s\n",
481	   count,
482	   visibleIChar(buf, (unsigned) (ptr - buf))));
483    return count;
484#undef MAP
485}
486