other.c revision 77683534
1/*
2Copyright (c) 2002 by Tomohiro KUBOTA
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#ifdef HAVE_CONFIG_H
24# include "config.h"
25#endif
26
27#include <stdlib.h>
28#include <stdio.h>
29#include <string.h>
30#include <ctype.h>
31#include "other.h"
32
33#ifndef NULL
34#define NULL 0
35#endif
36
37#define EURO_10646 0x20AC
38
39int
40init_gbk(OtherStatePtr s)
41{
42    s->gbk.mapping =
43	FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
44    if (!s->gbk.mapping)
45	return 0;
46
47    s->gbk.reverse = FontMapReverse(s->gbk.mapping);
48    if (!s->gbk.reverse)
49	return 0;
50
51    s->gbk.buf = -1;
52    return 1;
53}
54
55unsigned int
56mapping_gbk(unsigned int n, OtherStatePtr s)
57{
58    unsigned int r;
59    if (n < 128)
60	return n;
61    if (n == 128)
62	return EURO_10646;
63    r = FontEncRecode(n, s->gbk.mapping);
64    return r;
65}
66
67unsigned int
68reverse_gbk(unsigned int n, OtherStatePtr s)
69{
70    if (n < 128)
71	return n;
72    if (n == EURO_10646)
73	return 128;
74    return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
75}
76
77int
78stack_gbk(unsigned c, OtherStatePtr s)
79{
80    if (s->gbk.buf < 0) {
81	if (c < 129)
82	    return (int) c;
83	s->gbk.buf = (int) c;
84	return -1;
85    } else {
86	int b;
87	if (c < 0x40 || c == 0x7F) {
88	    s->gbk.buf = -1;
89	    return (int) c;
90	}
91	if (s->gbk.buf < 0xFF && c < 0xFF)
92	    b = (int) ((unsigned) (s->gbk.buf << 8) + c);
93	else
94	    b = -1;
95	s->gbk.buf = -1;
96	return b;
97    }
98}
99
100int
101init_utf8(OtherStatePtr s)
102{
103    s->utf8.buf_ptr = 0;
104    return 1;
105}
106
107unsigned int
108mapping_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
109{
110    return n;
111}
112
113unsigned int
114reverse_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
115{
116    if (n < 0x80)
117	return n;
118    if (n < 0x800)
119	return 0xC080 + ((n & 0x7C0) << 2) + (n & 0x3F);
120    if (n < 0x10000)
121	return 0xE08080 + ((n & 0xF000) << 4) + ((n & 0xFC0) << 2) + (n & 0x3F);
122    return 0xF0808080 + ((n & 0x1C0000) << 6) + ((n & 0x3F000) << 4) +
123	((n & 0xFC0) << 2) + (n & 0x3F);
124}
125
126int
127stack_utf8(unsigned c, OtherStatePtr s)
128{
129    int u;
130
131    if (c < 0x80) {
132	s->utf8.buf_ptr = 0;
133	return (int) c;
134    }
135    if (s->utf8.buf_ptr == 0) {
136	if ((c & 0x40) == 0)
137	    return -1;
138	s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
139	if ((c & 0x60) == 0x40)
140	    s->utf8.len = 2;
141	else if ((c & 0x70) == 0x60)
142	    s->utf8.len = 3;
143	else if ((c & 0x78) == 0x70)
144	    s->utf8.len = 4;
145	else
146	    s->utf8.buf_ptr = 0;
147	return -1;
148    }
149    if ((c & 0x40) != 0) {
150	s->utf8.buf_ptr = 0;
151	return -1;
152    }
153    s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
154    if (s->utf8.buf_ptr < s->utf8.len)
155	return -1;
156    switch (s->utf8.len) {
157    case 2:
158	u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
159	s->utf8.buf_ptr = 0;
160	if (u < 0x80)
161	    return -1;
162	else
163	    return u;
164    case 3:
165	u = ((s->utf8.buf[0] & 0x0F) << 12)
166	    | ((s->utf8.buf[1] & 0x3F) << 6)
167	    | (s->utf8.buf[2] & 0x3F);
168	s->utf8.buf_ptr = 0;
169	if (u < 0x800)
170	    return -1;
171	else
172	    return u;
173    case 4:
174	u = ((s->utf8.buf[0] & 0x03) << 18)
175	    | ((s->utf8.buf[1] & 0x3F) << 12)
176	    | ((s->utf8.buf[2] & 0x3F) << 6)
177	    | ((s->utf8.buf[3] & 0x3F));
178	s->utf8.buf_ptr = 0;
179	if (u < 0x10000)
180	    return -1;
181	else
182	    return u;
183    }
184    s->utf8.buf_ptr = 0;
185    return -1;
186}
187
188#define HALFWIDTH_10646 0xFF61
189#define YEN_SJIS 0x5C
190#define YEN_10646 0x00A5
191#define OVERLINE_SJIS 0x7E
192#define OVERLINE_10646 0x203E
193
194int
195init_sjis(OtherStatePtr s)
196{
197    s->sjis.x0208mapping =
198	FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
199    if (!s->sjis.x0208mapping)
200	return 0;
201
202    s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping);
203    if (!s->sjis.x0208reverse)
204	return 0;
205
206    s->sjis.x0201mapping =
207	FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
208    if (!s->sjis.x0201mapping)
209	return 0;
210
211    s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping);
212    if (!s->sjis.x0201reverse)
213	return 0;
214
215    s->sjis.buf = -1;
216    return 1;
217}
218
219unsigned int
220mapping_sjis(unsigned int n, OtherStatePtr s)
221{
222    unsigned int j1, j2, s1, s2;
223    if (n == YEN_SJIS)
224	return YEN_10646;
225    if (n == OVERLINE_SJIS)
226	return OVERLINE_10646;
227    if (n < 0x80)
228	return n;
229    if (n >= 0xA0 && n <= 0xDF)
230	return FontEncRecode(n, s->sjis.x0201mapping);
231    s1 = ((n >> 8) & 0xFF);
232    s2 = (n & 0xFF);
233    j1 = (s1 << 1)
234	- (unsigned) (s1 <= 0x9F ? 0xE0 : 0x160)
235	- (unsigned) (s2 < 0x9F ? 1 : 0);
236    j2 = s2
237	- 0x1F
238	- (unsigned) (s2 >= 0x7F ? 1 : 0)
239	- (unsigned) (s2 >= 0x9F ? 0x5E : 0);
240    return FontEncRecode((j1 << 8) + j2, s->sjis.x0208mapping);
241}
242
243unsigned int
244reverse_sjis(unsigned int n, OtherStatePtr s)
245{
246    unsigned int j, j1, j2, s1, s2;
247    if (n == YEN_10646)
248	return YEN_SJIS;
249    if (n == OVERLINE_10646)
250	return OVERLINE_SJIS;
251    if (n < 0x80)
252	return n;
253    if (n >= HALFWIDTH_10646)
254	return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
255    j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
256    j1 = ((j >> 8) & 0xFF);
257    j2 = (j & 0xFF);
258    s1 = ((j1 - 1) >> 1)
259	+ (unsigned) ((j1 <= 0x5E) ? 0x71 : 0xB1);
260    s2 = j2
261	+ (unsigned) ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
262    return (s1 << 8) + s2;
263}
264
265int
266stack_sjis(unsigned c, OtherStatePtr s)
267{
268    if (s->sjis.buf < 0) {
269	if (c < 128 || (c >= 0xA0 && c <= 0xDF))
270	    return (int) c;
271	s->sjis.buf = (int) c;
272	return -1;
273    } else {
274	int b;
275	if (c < 0x40 || c == 0x7F) {
276	    s->sjis.buf = -1;
277	    return (int) c;
278	}
279	if (s->sjis.buf < 0xFF && c < 0xFF)
280	    b = (int) ((unsigned) (s->sjis.buf << 8) + c);
281	else
282	    b = -1;
283	s->sjis.buf = -1;
284	return b;
285    }
286}
287
288int
289init_hkscs(OtherStatePtr s)
290{
291    s->hkscs.mapping =
292	FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
293    if (!s->hkscs.mapping)
294	return 0;
295
296    s->hkscs.reverse = FontMapReverse(s->hkscs.mapping);
297    if (!s->hkscs.reverse)
298	return 0;
299
300    s->hkscs.buf = -1;
301    return 1;
302}
303
304unsigned int
305mapping_hkscs(unsigned int n, OtherStatePtr s)
306{
307    unsigned int r;
308    if (n < 128)
309	return n;
310    if (n == 128)
311	return EURO_10646;
312    r = FontEncRecode(n, s->hkscs.mapping);
313    return r;
314}
315
316unsigned int
317reverse_hkscs(unsigned int n, OtherStatePtr s)
318{
319    if (n < 128)
320	return n;
321    if (n == EURO_10646)
322	return 128;
323    return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
324}
325
326int
327stack_hkscs(unsigned c, OtherStatePtr s)
328{
329    if (s->hkscs.buf < 0) {
330	if (c < 129)
331	    return (int) c;
332	s->hkscs.buf = (int) c;
333	return -1;
334    } else {
335	int b;
336	if (c < 0x40 || c == 0x7F) {
337	    s->hkscs.buf = -1;
338	    return (int) c;
339	}
340	if (s->hkscs.buf < 0xFF && c < 0xFF)
341	    b = (int) ((unsigned) (s->hkscs.buf << 8) + c);
342	else
343	    b = -1;
344	s->hkscs.buf = -1;
345	return b;
346    }
347}
348
349/*
350 *  Because of the 1 ~ 4 multi-bytes nature of GB18030.
351 *  CharSet encoding is split to 2 subset (besides latin)
352 *  The 2Bytes MB char is defined in gb18030.2000-0
353 *  The 4Bytes MB char is defined in gb18030.2000-1
354 *  Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
355 *  mapping.
356 *  To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the
357 *  unicode value after that.
358 *
359 *  For more info on GB18030 standard pls check:
360 *    http://oss.software.ibm.com/icu/docs/papers/gb18030.html
361 *
362 *  For more info on GB18030 implementation issues in XFree86 pls check:
363 *    http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
364 */
365int
366init_gb18030(OtherStatePtr s)
367{
368    s->gb18030.cs0_mapping =
369	FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
370    if (!s->gb18030.cs0_mapping)
371	return 0;
372
373    s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping);
374    if (!s->gb18030.cs0_reverse)
375	return 0;
376
377    s->gb18030.cs1_mapping =
378	FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL);
379    if (!s->gb18030.cs1_mapping)
380	return 0;
381
382    s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping);
383    if (!s->gb18030.cs1_reverse)
384	return 0;
385
386    s->gb18030.linear = 0;
387    s->gb18030.buf_ptr = 0;
388    return 1;
389}
390
391unsigned int
392mapping_gb18030(unsigned int n, OtherStatePtr s)
393{
394    if (n <= 0x80)
395	return n;		/* 0x80 is valid but unassigned codepoint */
396    if (n >= 0xFFFF)
397	return '?';
398
399    return FontEncRecode(n,
400			 (s->gb18030.linear) ? s->gb18030.cs1_mapping : s->gb18030.cs0_mapping);
401}
402
403unsigned int
404reverse_gb18030(unsigned int n, OtherStatePtr s)
405{
406    /* when lookup in 2000-0 failed. */
407    /* lookup in 2000-1 and then try to unlinear'd */
408    unsigned int r;
409    if (n <= 0x80)
410	return n;
411
412    r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
413    if (r != 0)
414	return r;
415
416    r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
417    if (r != 0) {
418	unsigned char bytes[4];
419
420	bytes[3] = UChar(0x30 + r % 10);
421	r /= 10;
422	bytes[2] = UChar(0x81 + r % 126);
423	r /= 126;
424	bytes[1] = UChar(0x30 + r % 10);
425	r /= 10;
426	bytes[0] = UChar(0x81 + r);
427
428	r = (unsigned int) bytes[0] << 24;
429	r |= (unsigned int) bytes[1] << 16;
430	r |= (unsigned int) bytes[2] << 8;
431	r |= (unsigned int) bytes[3];
432    }
433    return r;
434}
435
436int
437stack_gb18030(unsigned c, OtherStatePtr s)
438{
439    /* if set gb18030.linear => True. the return value is "linear'd" */
440    if (s->gb18030.buf_ptr == 0) {
441	if (c <= 0x80)
442	    return (int) c;
443	if (c == 0xFF)
444	    return -1;
445	s->gb18030.linear = 0;
446	s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
447	return -1;
448    } else if (s->gb18030.buf_ptr == 1) {
449	if (c >= 0x40) {
450	    s->gb18030.buf_ptr = 0;
451	    if ((c == 0x80) || (c == 0xFF))
452		return -1;
453	    else
454		return (int) ((unsigned) (s->gb18030.buf[0] << 8) + c);
455	} else if (c >= 30) {	/* 2Byte is (0x30 -> 0x39) */
456	    s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
457	    return -1;
458	} else {
459	    s->gb18030.buf_ptr = 0;
460	    return (int) c;
461	}
462    } else if (s->gb18030.buf_ptr == 2) {
463	if ((c >= 0x81) && (c <= 0xFE)) {
464	    s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
465	    return -1;
466	} else {
467	    s->gb18030.buf_ptr = 0;
468	    return (int) c;
469	}
470    } else {
471	int r = 0;
472	s->gb18030.buf_ptr = 0;
473	if ((c >= 0x30) && (c <= 0x39)) {
474	    s->gb18030.linear = 1;
475	    r = (((s->gb18030.buf[0] - 0x81) * 10
476		  + (s->gb18030.buf[1] - 0x30)) * 126
477		 + (s->gb18030.buf[2] - 0x81)) * 10
478		+ ((int) c - 0x30);
479	    return r;
480	}
481	return -1;
482    }
483}
484