other.c revision a8fdb4bc
1/*
2Copyright (c) 2002 by Tomohiro KUBOTA
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22/* $XFree86: xc/programs/luit/other.c,v 1.1 2002/10/17 01:06:09 dawes Exp $ */
23
24#include <stdlib.h>
25#include <stdio.h>
26#include <string.h>
27#include <ctype.h>
28#include <X11/fonts/fontenc.h>
29#include "other.h"
30#include "charset.h"
31
32#ifndef NULL
33#define NULL 0
34#endif
35
36#define EURO_10646 0x20AC
37
38int
39init_gbk(OtherStatePtr s)
40{
41    s->gbk.mapping =
42        FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
43    if(!s->gbk.mapping) return 0;
44
45    s->gbk.reverse = FontMapReverse(s->gbk.mapping);
46    if(!s->gbk.reverse) return 0;
47
48    s->gbk.buf = -1;
49    return 1;
50}
51
52unsigned int
53mapping_gbk(unsigned int n, OtherStatePtr s)
54{
55    unsigned int r;
56    if(n < 128) return n;
57    if(n == 128) return EURO_10646;
58    r = FontEncRecode(n, s->gbk.mapping);
59    return r;
60}
61
62unsigned int
63reverse_gbk(unsigned int n, OtherStatePtr s)
64{
65    if(n < 128) return n;
66    if(n == EURO_10646) return 128;
67    return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
68}
69
70int
71stack_gbk(unsigned char c, OtherStatePtr s)
72{
73    if(s->gbk.buf < 0) {
74        if(c < 129) return c;
75        s->gbk.buf = c;
76	return -1;
77    } else {
78        int b;
79        if(c < 0x40 || c == 0x7F) {
80            s->gbk.buf = -1;
81            return c;
82        }
83        if(s->gbk.buf < 0xFF && c < 0xFF)
84            b = (s->gbk.buf << 8) + c;
85        else
86            b = -1;
87        s->gbk.buf = -1;
88        return b;
89    }
90}
91
92int
93init_utf8(OtherStatePtr s)
94{
95    s->utf8.buf_ptr = 0;
96    return 1;
97}
98
99unsigned int
100mapping_utf8(unsigned int n, OtherStatePtr s)
101{
102    return n;
103}
104
105unsigned int
106reverse_utf8(unsigned int n, OtherStatePtr s)
107{
108    if(n < 0x80)
109        return n;
110    if(n < 0x800)
111        return 0xC080 + ((n&0x7C0)<<2) + (n&0x3F);
112    if(n < 0x10000)
113        return 0xE08080 + ((n&0xF000)<<4) + ((n&0xFC0)<<2) + (n&0x3F);
114    return 0xF0808080 + ((n&0x1C0000)<<6) + ((n&0x3F000)<<4) +
115           ((n&0xFC0)<<2) + (n&0x3F);
116}
117
118int
119stack_utf8(unsigned char c, OtherStatePtr s)
120{
121    int u;
122
123    if(c < 0x80) {
124        s->utf8.buf_ptr = 0;
125        return c;
126    }
127    if(s->utf8.buf_ptr == 0) {
128        if((c & 0x40) == 0) return -1;
129        s->utf8.buf[s->utf8.buf_ptr++] = c;
130        if((c & 0x60) == 0x40) s->utf8.len = 2;
131        else if((c & 0x70) == 0x60) s->utf8.len = 3;
132        else if((c & 0x78) == 0x70) s->utf8.len = 4;
133        else s->utf8.buf_ptr = 0;
134        return -1;
135    }
136    if((c & 0x40) != 0) {
137        s->utf8.buf_ptr = 0;
138        return -1;
139    }
140    s->utf8.buf[s->utf8.buf_ptr++] = c;
141    if(s->utf8.buf_ptr < s->utf8.len) return -1;
142    switch(s->utf8.len) {
143    case 2:
144        u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
145        s->utf8.buf_ptr = 0;
146        if(u < 0x80) return -1; else return u;
147    case 3:
148        u = ((s->utf8.buf[0] & 0x0F) << 12)
149            | ((s->utf8.buf[1] & 0x3F) << 6)
150            | (s->utf8.buf[2] & 0x3F);
151        s->utf8.buf_ptr = 0;
152        if(u < 0x800) return -1; else return u;
153    case 4:
154        u = ((s->utf8.buf[0] & 0x03) << 18)
155            | ((s->utf8.buf[1] & 0x3F) << 12)
156            | ((s->utf8.buf[2] & 0x3F) << 6)
157            | ((s->utf8.buf[3] & 0x3F));
158        s->utf8.buf_ptr = 0;
159        if(u < 0x10000) return -1; else return u;
160    }
161    s->utf8.buf_ptr = 0;
162    return -1;
163}
164
165
166#define HALFWIDTH_10646 0xFF61
167#define YEN_SJIS 0x5C
168#define YEN_10646 0x00A5
169#define OVERLINE_SJIS 0x7E
170#define OVERLINE_10646 0x203E
171
172int
173init_sjis(OtherStatePtr s)
174{
175    s->sjis.x0208mapping =
176        FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
177    if(!s->sjis.x0208mapping) return 0;
178
179    s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping);
180    if(!s->sjis.x0208reverse) return 0;
181
182    s->sjis.x0201mapping =
183        FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
184    if(!s->sjis.x0201mapping) return 0;
185
186    s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping);
187    if(!s->sjis.x0201reverse) return 0;
188
189    s->sjis.buf = -1;
190    return 1;
191}
192
193unsigned int
194mapping_sjis(unsigned int n, OtherStatePtr s)
195{
196    unsigned int j1, j2, s1, s2;
197    if(n == YEN_SJIS) return YEN_10646;
198    if(n == OVERLINE_SJIS) return OVERLINE_10646;
199    if(n < 0x80) return n;
200    if(n >= 0xA0 && n <= 0xDF) return FontEncRecode(n, s->sjis.x0201mapping);
201    s1 = ((n>>8)&0xFF);
202    s2 = (n&0xFF);
203    j1 = (s1 << 1) - (s1 <= 0x9F ? 0xE0 : 0x160) - (s2 < 0x9F ? 1 : 0);
204    j2 = s2 - 0x1F - (s2 >= 0x7F ? 1 : 0) - (s2 >= 0x9F ? 0x5E : 0);
205    return FontEncRecode((j1<<8) + j2, s->sjis.x0208mapping);
206}
207
208unsigned int
209reverse_sjis(unsigned int n, OtherStatePtr s)
210{
211    unsigned int j, j1, j2, s1, s2;
212    if(n == YEN_10646) return YEN_SJIS;
213    if(n == OVERLINE_10646) return OVERLINE_SJIS;
214    if(n < 0x80) return n;
215    if(n >= HALFWIDTH_10646)
216        return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
217    j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
218    j1 = ((j>>8)&0xFF);
219    j2 = (j&0xFF);
220    s1 = ((j1 - 1) >> 1) + ((j1 <= 0x5E) ? 0x71 : 0xB1);
221    s2 = j2 + ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
222    return (s1<<8) + s2;
223}
224
225int
226stack_sjis(unsigned char c, OtherStatePtr s)
227{
228    if(s->sjis.buf < 0) {
229        if(c < 128 || (c >= 0xA0 && c <= 0xDF)) return c;
230        s->sjis.buf = c;
231	return -1;
232    } else {
233        int b;
234        if(c < 0x40 || c == 0x7F) {
235            s->sjis.buf = -1;
236            return c;
237        }
238        if(s->sjis.buf < 0xFF && c < 0xFF)
239            b = (s->sjis.buf << 8) + c;
240        else
241            b = -1;
242        s->sjis.buf = -1;
243        return b;
244    }
245}
246
247int
248init_hkscs(OtherStatePtr s)
249{
250    s->hkscs.mapping =
251        FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
252    if(!s->hkscs.mapping) return 0;
253
254    s->hkscs.reverse = FontMapReverse(s->hkscs.mapping);
255    if(!s->hkscs.reverse) return 0;
256
257    s->hkscs.buf = -1;
258    return 1;
259}
260
261unsigned int
262mapping_hkscs(unsigned int n, OtherStatePtr s)
263{
264    unsigned int r;
265    if(n < 128) return n;
266    if(n == 128) return EURO_10646;
267    r = FontEncRecode(n, s->hkscs.mapping);
268    return r;
269}
270
271unsigned int
272reverse_hkscs(unsigned int n, OtherStatePtr s)
273{
274    if(n < 128) return n;
275    if(n == EURO_10646) return 128;
276    return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
277}
278
279int
280stack_hkscs(unsigned char c, OtherStatePtr s)
281{
282    if(s->hkscs.buf < 0) {
283        if(c < 129) return c;
284        s->hkscs.buf = c;
285	return -1;
286    } else {
287        int b;
288        if(c < 0x40 || c == 0x7F) {
289            s->hkscs.buf = -1;
290            return c;
291        }
292        if(s->hkscs.buf < 0xFF && c < 0xFF)
293            b = (s->hkscs.buf << 8) + c;
294        else
295            b = -1;
296        s->hkscs.buf = -1;
297        return b;
298    }
299}
300
301
302/*
303 *  Because of the 1 ~ 4 multi-bytes nature of GB18030.
304 *  CharSet encoding is split to 2 subset (besides latin)
305 *  The 2Bytes MB char is defined in gb18030.2000-0
306 *  The 4Bytes MB char is defined in gb18030.2000-1
307 *  Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
308 *  mapping.
309 *  To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the
310 *  unicode value after that.
311 *
312 *  For more info on GB18030 standard pls check:
313 *    http://oss.software.ibm.com/icu/docs/papers/gb18030.html
314 *
315 *  For more info on GB18030 implementation issues in XFree86 pls check:
316 *    http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
317 */
318int
319init_gb18030(OtherStatePtr s)
320{
321    s->gb18030.cs0_mapping =
322        FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
323    if(!s->gb18030.cs0_mapping) return 0;
324
325    s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping);
326    if(!s->gb18030.cs0_reverse) return 0;
327
328    s->gb18030.cs1_mapping =
329        FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL);
330    if(!s->gb18030.cs1_mapping) return 0;
331
332    s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping);
333    if(!s->gb18030.cs1_reverse) return 0;
334
335    s->gb18030.linear  = 0;
336    s->gb18030.buf_ptr = 0;
337    return 1;
338}
339
340unsigned int
341mapping_gb18030(unsigned int n, OtherStatePtr s)
342{
343    if(n <= 0x80)   return n;       /* 0x80 is valid but unassigned codepoint */
344    if(n >= 0xFFFF) return '?';
345
346    return FontEncRecode(n,
347            (s->gb18030.linear)?s->gb18030.cs1_mapping:s->gb18030.cs0_mapping);
348}
349
350unsigned int
351reverse_gb18030(unsigned int n, OtherStatePtr s)
352{
353    /* when lookup in 2000-0 failed. */
354    /* lookup in 2000-1 and then try to unlinear'd */
355    unsigned int r;
356    if(n <= 0x80) return n;
357
358    r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
359    if (r != 0)
360        return r;
361
362    r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
363    if (r != 0) {
364        unsigned char bytes[4];
365
366        bytes[3] = 0x30 + r % 10;   r /= 10;
367        bytes[2] = 0x81 + r % 126;  r /= 126;
368        bytes[1] = 0x30 + r % 10;   r /= 10;
369        bytes[0] = 0x81 + r;
370
371        r  = (unsigned int)bytes[0] << 24;
372        r |= (unsigned int)bytes[1] << 16;
373        r |= (unsigned int)bytes[2] << 8;
374        r |= (unsigned int)bytes[3];
375    }
376    return r;
377}
378
379int
380stack_gb18030(unsigned char c, OtherStatePtr s)
381{
382    /* if set gb18030.linear => True. the return value is "linear'd" */
383    if(s->gb18030.buf_ptr == 0) {
384        if(c <= 0x80) return c;
385        if (c == 0xFF) return -1;
386        s->gb18030.linear = 0;
387        s->gb18030.buf[s->gb18030.buf_ptr++] = c;
388        return -1;
389    } else if (s->gb18030.buf_ptr == 1) {
390        if (c >= 0x40) {
391            s->gb18030.buf_ptr = 0;
392            if ((c == 0x80) || (c == 0xFF))
393                return -1;
394            else
395                return (s->gb18030.buf[0] << 8) + c;
396        } else if (c >= 30) {   /* 2Byte is (0x30 -> 0x39) */
397            s->gb18030.buf[s->gb18030.buf_ptr++] = c;
398            return -1;
399        } else {
400            s->gb18030.buf_ptr = 0;
401            return c;
402        }
403    } else if (s->gb18030.buf_ptr == 2) {
404        if ((c >= 0x81) && (c <= 0xFE)) {
405            s->gb18030.buf[s->gb18030.buf_ptr++] = c;
406            return -1;
407        } else {
408            s->gb18030.buf_ptr = 0;
409            return c;
410        }
411    } else {
412        int r = 0;
413        s->gb18030.buf_ptr = 0;
414        if ((c >= 0x30) && (c <= 0x39)) {
415            s->gb18030.linear = 1;
416            r = (((s->gb18030.buf[0] - 0x81) * 10
417                        + (s->gb18030.buf[1] - 0x30)) * 126
418                    + (s->gb18030.buf[2] - 0x81)) * 10
419                + (c - 0x30);
420            return r;
421        }
422        return -1;
423    }
424}
425
426