charsets.c revision 2eaa94a1
1/* $XTermId: charsets.c,v 1.40 2008/05/26 22:49:57 tom Exp $ */
2
3/************************************************************
4
5Copyright 1998-2007,2008 by Thomas E. Dickey
6
7                        All Rights Reserved
8
9Permission is hereby granted, free of charge, to any person obtaining a
10copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice shall be included
18in all copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
24CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28Except as contained in this notice, the name(s) of the above copyright
29holders shall not be used in advertising or otherwise to promote the
30sale, use or other dealings in this Software without prior written
31authorization.
32
33********************************************************/
34
35#include <xterm.h>
36#include <data.h>
37#include <fontutils.h>
38
39#include <X11/keysym.h>
40
41/*
42 * This module performs translation as needed to support the DEC VT220 national
43 * replacement character sets.  We assume that xterm's font is based on the ISO
44 * 8859-1 (Latin 1) character set, which is almost the same as the DEC
45 * multinational character set.  Glyph positions 0-31 have to be the DEC
46 * graphic characters, though.
47 *
48 * References:
49 *	"VT220 Programmer Pocket Guide" EK-VT220-HR-002 (2nd ed., 1984), which
50 *		contains character charts for the national character sets.
51 *	"VT330/VT340 Programmer Reference Manual Volume 1: Text Programming"
52 *		EK-VT3XX-TP-001 (1st ed, 1987), which contains a table (2-1)
53 *		listing the glyphs which are mapped from the multinational
54 *		character set to the national character set.
55 *
56 * The latter reference, though easier to read, has a few errors and omissions.
57 */
58
59/*
60 * Translate an input keysym to the corresponding NRC keysym.
61 */
62unsigned
63xtermCharSetIn(unsigned code, int charset)
64{
65#define MAP(to, from) case from: code = to; break
66
67    if (code >= 128 && code < 256) {
68	switch (charset) {
69	case 'A':		/* United Kingdom set (or Latin 1)      */
70	    if (code == XK_sterling)
71		code = 0x23;
72	    code &= 0x7f;
73	    break;
74
75#if OPT_XMC_GLITCH
76	case '?':
77#endif
78	case '1':		/* Alternate Character ROM standard characters */
79	case '2':		/* Alternate Character ROM special graphics */
80	case 'B':		/* ASCII set                            */
81	    break;
82
83	case '0':		/* special graphics (line drawing)      */
84	    break;
85
86	case '4':		/* Dutch */
87	    switch (code) {
88		MAP(0x23, XK_sterling);
89		MAP(0x40, XK_threequarters);
90		MAP(0x5b, XK_ydiaeresis);
91		MAP(0x5c, XK_onehalf);
92		MAP(0x5d, XK_bar);	/* glyph is not ISO-8859-1 */
93		MAP(0x7b, XK_diaeresis);
94		MAP(0x7c, XK_f);	/* glyph is not ISO-8859-1 */
95		MAP(0x7d, XK_onequarter);
96		MAP(0x7e, XK_acute);
97	    }
98	    break;
99
100	case 'C':
101	case '5':		/* Finnish */
102	    switch (code) {
103		MAP(0x5b, XK_Adiaeresis);
104		MAP(0x5c, XK_Odiaeresis);
105		MAP(0x5d, XK_Aring);
106		MAP(0x5e, XK_Udiaeresis);
107		MAP(0x60, XK_eacute);
108		MAP(0x7b, XK_adiaeresis);
109		MAP(0x7c, XK_odiaeresis);
110		MAP(0x7d, XK_aring);
111		MAP(0x7e, XK_udiaeresis);
112	    }
113	    break;
114
115	case 'R':		/* French */
116	    switch (code) {
117		MAP(0x23, XK_sterling);
118		MAP(0x40, XK_agrave);
119		MAP(0x5b, XK_degree);
120		MAP(0x5c, XK_ccedilla);
121		MAP(0x5d, XK_section);
122		MAP(0x7b, XK_eacute);
123		MAP(0x7c, XK_ugrave);
124		MAP(0x7d, XK_egrave);
125		MAP(0x7e, XK_diaeresis);
126	    }
127	    break;
128
129	case 'Q':		/* French Canadian */
130	    switch (code) {
131		MAP(0x40, XK_agrave);
132		MAP(0x5b, XK_acircumflex);
133		MAP(0x5c, XK_ccedilla);
134		MAP(0x5d, XK_ecircumflex);
135		MAP(0x5e, XK_icircumflex);
136		MAP(0x60, XK_ocircumflex);
137		MAP(0x7b, XK_eacute);
138		MAP(0x7c, XK_ugrave);
139		MAP(0x7d, XK_egrave);
140		MAP(0x7e, XK_ucircumflex);
141	    }
142	    break;
143
144	case 'K':		/* German */
145	    switch (code) {
146		MAP(0x40, XK_section);
147		MAP(0x5b, XK_Adiaeresis);
148		MAP(0x5c, XK_Odiaeresis);
149		MAP(0x5d, XK_Udiaeresis);
150		MAP(0x7b, XK_adiaeresis);
151		MAP(0x7c, XK_odiaeresis);
152		MAP(0x7d, XK_udiaeresis);
153		MAP(0x7e, XK_ssharp);
154	    }
155	    break;
156
157	case 'Y':		/* Italian */
158	    switch (code) {
159		MAP(0x23, XK_sterling);
160		MAP(0x40, XK_section);
161		MAP(0x5b, XK_degree);
162		MAP(0x5c, XK_ccedilla);
163		MAP(0x5d, XK_eacute);
164		MAP(0x60, XK_ugrave);
165		MAP(0x7b, XK_agrave);
166		MAP(0x7c, XK_ograve);
167		MAP(0x7d, XK_egrave);
168		MAP(0x7e, XK_igrave);
169	    }
170	    break;
171
172	case 'E':
173	case '6':		/* Norwegian/Danish */
174	    switch (code) {
175		MAP(0x40, XK_Adiaeresis);
176		MAP(0x5b, XK_AE);
177		MAP(0x5c, XK_Ooblique);
178		MAP(0x5d, XK_Aring);
179		MAP(0x5e, XK_Udiaeresis);
180		MAP(0x60, XK_adiaeresis);
181		MAP(0x7b, XK_ae);
182		MAP(0x7c, XK_oslash);
183		MAP(0x7d, XK_aring);
184		MAP(0x7e, XK_udiaeresis);
185	    }
186	    break;
187
188	case 'Z':		/* Spanish */
189	    switch (code) {
190		MAP(0x23, XK_sterling);
191		MAP(0x40, XK_section);
192		MAP(0x5b, XK_exclamdown);
193		MAP(0x5c, XK_Ntilde);
194		MAP(0x5d, XK_questiondown);
195		MAP(0x7b, XK_degree);
196		MAP(0x7c, XK_ntilde);
197		MAP(0x7d, XK_ccedilla);
198	    }
199	    break;
200
201	case 'H':
202	case '7':		/* Swedish */
203	    switch (code) {
204		MAP(0x40, XK_Eacute);
205		MAP(0x5b, XK_Adiaeresis);
206		MAP(0x5c, XK_Odiaeresis);
207		MAP(0x5d, XK_Aring);
208		MAP(0x5e, XK_Udiaeresis);
209		MAP(0x60, XK_eacute);
210		MAP(0x7b, XK_adiaeresis);
211		MAP(0x7c, XK_odiaeresis);
212		MAP(0x7d, XK_aring);
213		MAP(0x7e, XK_udiaeresis);
214	    }
215	    break;
216
217	case '=':		/* Swiss */
218	    switch (code) {
219		MAP(0x23, XK_ugrave);
220		MAP(0x40, XK_agrave);
221		MAP(0x5b, XK_eacute);
222		MAP(0x5c, XK_ccedilla);
223		MAP(0x5d, XK_ecircumflex);
224		MAP(0x5e, XK_icircumflex);
225		MAP(0x5f, XK_egrave);
226		MAP(0x60, XK_ocircumflex);
227		MAP(0x7b, XK_adiaeresis);
228		MAP(0x7c, XK_odiaeresis);
229		MAP(0x7d, XK_udiaeresis);
230		MAP(0x7e, XK_ucircumflex);
231	    }
232	    break;
233
234	default:		/* any character sets we don't recognize */
235	    break;
236	}
237	code &= 0x7f;		/* NRC in any case is 7-bit */
238    }
239    return code;
240#undef MAP
241}
242
243/*
244 * Translate a string to the display form.  This assumes the font has the
245 * DEC graphic characters in cells 0-31, and otherwise is ISO-8859-1.
246 */
247int
248xtermCharSetOut(XtermWidget xw, IChar * buf, IChar * ptr, int leftset)
249{
250    IChar *s;
251    TScreen *screen = TScreenOf(xw);
252    int count = 0;
253    int rightset = screen->gsets[(int) (screen->curgr)];
254
255#define MAP(from, to) case from: chr = to; break
256
257    TRACE(("CHARSET GL=%c(G%d) GR=%c(G%d) SS%d\n\t%s\n",
258	   leftset, screen->curgl,
259	   rightset, screen->curgr,
260	   screen->curss,
261	   visibleIChar(buf, (unsigned) (ptr - buf))));
262
263    for (s = buf; s < ptr; ++s) {
264	int eight = CharOf(E2A(*s));
265	int seven = eight & 0x7f;
266	int cs = (eight >= 128) ? rightset : leftset;
267	int chr = eight;
268
269	count++;
270#if OPT_WIDE_CHARS
271	/*
272	 * This is only partly right - prevent inadvertant remapping of
273	 * the replacement character and other non-8bit codes into bogus
274	 * 8bit codes.
275	 */
276	if (screen->utf8_mode) {
277	    if (*s > 255)
278		continue;
279	}
280#endif
281	switch (cs) {
282	case 'A':		/* United Kingdom set (or Latin 1)      */
283	    if ((xw->flags & NATIONAL)
284		|| (screen->vtXX_level <= 1)) {
285		if (chr == 0x23) {
286#if OPT_WIDE_CHARS
287		    chr = (screen->utf8_mode
288			   ? 0xa3
289			   : XTERM_POUND);
290#else
291		    chr = XTERM_POUND;
292#endif
293		}
294	    } else {
295		chr = (seven | 0x80);
296	    }
297	    break;
298
299#if OPT_XMC_GLITCH
300	case '?':
301#endif
302	case '1':		/* Alternate Character ROM standard characters */
303	case '2':		/* Alternate Character ROM special graphics */
304	case 'B':		/* ASCII set                            */
305	    break;
306
307	case '0':		/* special graphics (line drawing)      */
308	    if (seven > 0x5f && seven <= 0x7e) {
309#if OPT_WIDE_CHARS
310		if (screen->utf8_mode)
311		    chr = dec2ucs((unsigned) (seven - 0x5f));
312		else
313#endif
314		    chr = seven - 0x5f;
315	    } else {
316		chr = seven;
317	    }
318	    break;
319
320	case '4':		/* Dutch */
321	    switch (chr = seven) {
322		MAP(0x23, XK_sterling);
323		MAP(0x40, XK_threequarters);
324		MAP(0x5b, XK_ydiaeresis);
325		MAP(0x5c, XK_onehalf);
326		MAP(0x5d, XK_bar);
327		MAP(0x7b, XK_diaeresis);
328		MAP(0x7c, XK_f);
329		MAP(0x7d, XK_onequarter);
330		MAP(0x7e, XK_acute);
331	    }
332	    break;
333
334	case 'C':
335	case '5':		/* Finnish */
336	    switch (chr = seven) {
337		MAP(0x5b, XK_Adiaeresis);
338		MAP(0x5c, XK_Odiaeresis);
339		MAP(0x5d, XK_Aring);
340		MAP(0x5e, XK_Udiaeresis);
341		MAP(0x60, XK_eacute);
342		MAP(0x7b, XK_adiaeresis);
343		MAP(0x7c, XK_odiaeresis);
344		MAP(0x7d, XK_aring);
345		MAP(0x7e, XK_udiaeresis);
346	    }
347	    break;
348
349	case 'R':		/* French */
350	    switch (chr = seven) {
351		MAP(0x23, XK_sterling);
352		MAP(0x40, XK_agrave);
353		MAP(0x5b, XK_degree);
354		MAP(0x5c, XK_ccedilla);
355		MAP(0x5d, XK_section);
356		MAP(0x7b, XK_eacute);
357		MAP(0x7c, XK_ugrave);
358		MAP(0x7d, XK_egrave);
359		MAP(0x7e, XK_diaeresis);
360	    }
361	    break;
362
363	case 'Q':		/* French Canadian */
364	    switch (chr = seven) {
365		MAP(0x40, XK_agrave);
366		MAP(0x5b, XK_acircumflex);
367		MAP(0x5c, XK_ccedilla);
368		MAP(0x5d, XK_ecircumflex);
369		MAP(0x5e, XK_icircumflex);
370		MAP(0x60, XK_ocircumflex);
371		MAP(0x7b, XK_eacute);
372		MAP(0x7c, XK_ugrave);
373		MAP(0x7d, XK_egrave);
374		MAP(0x7e, XK_ucircumflex);
375	    }
376	    break;
377
378	case 'K':		/* German */
379	    switch (chr = seven) {
380		MAP(0x40, XK_section);
381		MAP(0x5b, XK_Adiaeresis);
382		MAP(0x5c, XK_Odiaeresis);
383		MAP(0x5d, XK_Udiaeresis);
384		MAP(0x7b, XK_adiaeresis);
385		MAP(0x7c, XK_odiaeresis);
386		MAP(0x7d, XK_udiaeresis);
387		MAP(0x7e, XK_ssharp);
388	    }
389	    break;
390
391	case 'Y':		/* Italian */
392	    switch (chr = seven) {
393		MAP(0x23, XK_sterling);
394		MAP(0x40, XK_section);
395		MAP(0x5b, XK_degree);
396		MAP(0x5c, XK_ccedilla);
397		MAP(0x5d, XK_eacute);
398		MAP(0x60, XK_ugrave);
399		MAP(0x7b, XK_agrave);
400		MAP(0x7c, XK_ograve);
401		MAP(0x7d, XK_egrave);
402		MAP(0x7e, XK_igrave);
403	    }
404	    break;
405
406	case 'E':
407	case '6':		/* Norwegian/Danish */
408	    switch (chr = seven) {
409		MAP(0x40, XK_Adiaeresis);
410		MAP(0x5b, XK_AE);
411		MAP(0x5c, XK_Ooblique);
412		MAP(0x5d, XK_Aring);
413		MAP(0x5e, XK_Udiaeresis);
414		MAP(0x60, XK_adiaeresis);
415		MAP(0x7b, XK_ae);
416		MAP(0x7c, XK_oslash);
417		MAP(0x7d, XK_aring);
418		MAP(0x7e, XK_udiaeresis);
419	    }
420	    break;
421
422	case 'Z':		/* Spanish */
423	    switch (chr = seven) {
424		MAP(0x23, XK_sterling);
425		MAP(0x40, XK_section);
426		MAP(0x5b, XK_exclamdown);
427		MAP(0x5c, XK_Ntilde);
428		MAP(0x5d, XK_questiondown);
429		MAP(0x7b, XK_degree);
430		MAP(0x7c, XK_ntilde);
431		MAP(0x7d, XK_ccedilla);
432	    }
433	    break;
434
435	case 'H':
436	case '7':		/* Swedish */
437	    switch (chr = seven) {
438		MAP(0x40, XK_Eacute);
439		MAP(0x5b, XK_Adiaeresis);
440		MAP(0x5c, XK_Odiaeresis);
441		MAP(0x5d, XK_Aring);
442		MAP(0x5e, XK_Udiaeresis);
443		MAP(0x60, XK_eacute);
444		MAP(0x7b, XK_adiaeresis);
445		MAP(0x7c, XK_odiaeresis);
446		MAP(0x7d, XK_aring);
447		MAP(0x7e, XK_udiaeresis);
448	    }
449	    break;
450
451	case '=':		/* Swiss */
452	    switch (chr = seven) {
453		MAP(0x23, XK_ugrave);
454		MAP(0x40, XK_agrave);
455		MAP(0x5b, XK_eacute);
456		MAP(0x5c, XK_ccedilla);
457		MAP(0x5d, XK_ecircumflex);
458		MAP(0x5e, XK_icircumflex);
459		MAP(0x5f, XK_egrave);
460		MAP(0x60, XK_ocircumflex);
461		MAP(0x7b, XK_adiaeresis);
462		MAP(0x7c, XK_odiaeresis);
463		MAP(0x7d, XK_udiaeresis);
464		MAP(0x7e, XK_ucircumflex);
465	    }
466	    break;
467
468	default:		/* any character sets we don't recognize */
469	    count--;
470	    break;
471	}
472	/*
473	 * The state machine already treated DEL as a nonprinting and
474	 * nonspacing character.  If we have DEL now, simply render
475	 * it as a blank.
476	 */
477	if (chr == ANSI_DEL)
478	    chr = ' ';
479	*s = A2E(chr);
480    }
481    TRACE(("%d\t%s\n",
482	   count,
483	   visibleIChar(buf, (unsigned) (ptr - buf))));
484    return count;
485#undef MAP
486}
487