gettext.c revision 1.3 1 /* $NetBSD: gettext.c,v 1.3 2000/10/31 11:08:18 itojun Exp $ */
2
3 /*-
4 * Copyright (c) 2000 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: gettext.c,v 1.3 2000/10/31 11:08:18 itojun Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <sys/uio.h>
39
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52
53 static struct mohandle mohandle;
54
55 static const char *lookup_category __P((int));
56 static const char *split_locale __P((const char *));
57 static const char *lookup_mofile __P((char *, size_t, const char *,
58 char *, const char *, const char *));
59 static u_int32_t flip __P((u_int32_t, u_int32_t));
60 static int validate __P((void *));
61 static int mapit __P((const char *));
62 static int unmapit __P((void));
63 static const char *lookup_hash __P((const char *));
64 static const char *lookup_bsearch __P((const char *));
65 static const char *lookup __P((const char *));
66
67 /*
68 * shortcut functions. the main implementation resides in dcngettext().
69 */
70 char *
71 gettext(msgid)
72 const char *msgid;
73 {
74
75 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
76 }
77
78 char *
79 dgettext(domainname, msgid)
80 const char *domainname;
81 const char *msgid;
82 {
83
84 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
85 }
86
87 char *
88 dcgettext(domainname, msgid, category)
89 const char *domainname;
90 const char *msgid;
91 int category;
92 {
93
94 return dcngettext(domainname, msgid, NULL, 1UL, category);
95 }
96
97 char *
98 ngettext(msgid1, msgid2, n)
99 const char *msgid1;
100 const char *msgid2;
101 unsigned long int n;
102 {
103
104 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
105 }
106
107 char *
108 dngettext(domainname, msgid1, msgid2, n)
109 const char *domainname;
110 const char *msgid1;
111 const char *msgid2;
112 unsigned long int n;
113 {
114
115 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
116 }
117
118 /*
119 * dcngettext() -
120 * lookup internationalized message on database locale/category/domainname
121 * (like ja_JP.eucJP/LC_MESSAGES/domainname).
122 * if n equals to 1, internationalized message will be looked up for msgid1.
123 * otherwise, message will be looked up for msgid2.
124 * if the lookup fails, the function will return msgid1 or msgid2 as is.
125 *
126 * Even though the return type is "char *", caller should not rewrite the
127 * region pointed to by the return value (should be "const char *", but can't
128 * change it for compatibility with other implementations).
129 *
130 * by default (if domainname == NULL), domainname is taken from the value set
131 * by textdomain(). usually name of the application (like "ls") is used as
132 * domainname. category is usually LC_MESSAGES.
133 *
134 * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
135 * endian encoded file. both endians are supported here, as the files are in
136 * /usr/share/locale! (or we should move those files into /usr/libdata)
137 */
138
139 static const char *
140 lookup_category(category)
141 int category;
142 {
143
144 switch (category) {
145 case LC_COLLATE: return "LC_COLLATE";
146 case LC_CTYPE: return "LC_CTYPE";
147 case LC_MONETARY: return "LC_MONETARY";
148 case LC_NUMERIC: return "LC_NUMERIC";
149 case LC_TIME: return "LC_TIME";
150 case LC_MESSAGES: return "LC_MESSAGES";
151 }
152 return NULL;
153 }
154
155 /*
156 * XPG syntax: language[_territory[.codeset]][@modifier]
157 * XXX boundary check on "result" is lacking
158 */
159 static const char *
160 split_locale(lname)
161 const char *lname;
162 {
163 char buf[BUFSIZ], tmp[BUFSIZ];
164 char *l, *t, *c, *m;
165 static char result[BUFSIZ];
166
167 memset(result, 0, sizeof(result));
168
169 if (strlen(lname) + 1 > sizeof(buf)) {
170 fail:
171 return lname;
172 }
173
174 strlcpy(buf, lname, sizeof(buf));
175 m = strrchr(buf, '@');
176 if (m)
177 *m++ = '\0';
178 c = strrchr(buf, '.');
179 if (c)
180 *c++ = '\0';
181 t = strrchr(buf, '_');
182 if (t)
183 *t++ = '\0';
184 l = buf;
185 if (strlen(l) == 0)
186 goto fail;
187 if (c && !t)
188 goto fail;
189
190 if (m) {
191 if (t) {
192 if (c) {
193 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
194 l, t, c, m);
195 strlcat(result, tmp, sizeof(result));
196 strlcat(result, ":", sizeof(result));
197 }
198 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
199 strlcat(result, tmp, sizeof(result));
200 strlcat(result, ":", sizeof(result));
201 }
202 snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
203 strlcat(result, tmp, sizeof(result));
204 strlcat(result, ":", sizeof(result));
205 }
206 if (t) {
207 if (c) {
208 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
209 strlcat(result, tmp, sizeof(result));
210 strlcat(result, ":", sizeof(result));
211 }
212 strlcat(result, tmp, sizeof(result));
213 strlcat(result, ":", sizeof(result));
214 }
215 strlcat(result, l, sizeof(result));
216
217 return result;
218 }
219
220 static const char *
221 lookup_mofile(buf, len, dir, lpath, category, domainname)
222 char *buf;
223 size_t len;
224 const char *dir;
225 char *lpath; /* list of locales to be tried */
226 const char *category;
227 const char *domainname;
228 {
229 struct stat st;
230 char *p, *q;
231
232 q = lpath;
233 while (1) {
234 p = strsep(&q, ":");
235 if (!p)
236 break;
237 if (!*p)
238 continue;
239
240 /* don't mess with default locales */
241 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
242 return NULL;
243
244 /* validate pathname */
245 if (strchr(p, '/') || strchr(category, '/'))
246 continue;
247 #if 1 /*?*/
248 if (strchr(domainname, '/'))
249 continue;
250 #endif
251
252 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
253 category, domainname);
254 if (stat(buf, &st) < 0)
255 continue;
256 if ((st.st_mode & S_IFMT) != S_IFREG)
257 continue;
258
259 if (mapit(buf) == 0)
260 return buf;
261 }
262
263 return NULL;
264 }
265
266 static u_int32_t
267 flip(v, magic)
268 u_int32_t v;
269 u_int32_t magic;
270 {
271
272 if (magic == MO_MAGIC)
273 return v;
274 else if (magic == MO_MAGIC_SWAPPED) {
275 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
276 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
277 return v;
278 } else {
279 abort();
280 /*NOTREACHED*/
281 }
282 }
283
284 static int
285 validate(arg)
286 void *arg;
287 {
288 char *p;
289
290 p = (char *)arg;
291 if (p < (char *)mohandle.addr ||
292 p > (char *)mohandle.addr + mohandle.len)
293 return 0;
294 else
295 return 1;
296 }
297
298 int
299 mapit(path)
300 const char *path;
301 {
302 int fd;
303 struct stat st;
304 char *base;
305 u_int32_t magic, revision;
306 struct moentry *otable, *ttable;
307 struct moentry_h *p;
308 struct mo *mo;
309 size_t l;
310 int i;
311 char *v;
312
313 if (mohandle.addr && strcmp(path, mohandle.path) == 0)
314 return 0; /*already opened*/
315
316 unmapit();
317
318 #if 0
319 if (secure_path(path) != 0)
320 goto fail;
321 #endif
322 if (stat(path, &st) < 0)
323 goto fail;
324 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
325 goto fail;
326 fd = open(path, O_RDONLY);
327 if (fd < 0)
328 goto fail;
329 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
330 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
331 close(fd);
332 goto fail;
333 }
334 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
335 flip(revision, magic) != MO_REVISION) {
336 close(fd);
337 goto fail;
338 }
339 mohandle.addr = mmap(NULL, st.st_size, PROT_READ, MAP_FILE | MAP_SHARED,
340 fd, (off_t)0);
341 if (!mohandle.addr) {
342 close(fd);
343 goto fail;
344 }
345 close(fd);
346 mohandle.len = st.st_size;
347 strlcpy(mohandle.path, path, sizeof(mohandle.path));
348
349 base = mohandle.addr;
350 mo = (struct mo *)mohandle.addr;
351
352 /* flip endian. do not flip magic number! */
353 mohandle.mo.mo_magic = mo->mo_magic;
354 mohandle.mo.mo_revision = flip(mo->mo_revision, magic);
355 mohandle.mo.mo_nstring = flip(mo->mo_nstring, magic);
356
357 /* validate otable/ttable */
358 otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
359 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
360 if (!validate(otable) || !validate(&otable[mohandle.mo.mo_nstring])) {
361 unmapit();
362 goto fail;
363 }
364 if (!validate(ttable) || !validate(&ttable[mohandle.mo.mo_nstring])) {
365 unmapit();
366 goto fail;
367 }
368
369 /* allocate [ot]table, and convert to normal pointer representation. */
370 l = sizeof(struct moentry_h) * mohandle.mo.mo_nstring;
371 mohandle.mo.mo_otable = (struct moentry_h *)malloc(l);
372 if (!mohandle.mo.mo_otable) {
373 unmapit();
374 goto fail;
375 }
376 mohandle.mo.mo_ttable = (struct moentry_h *)malloc(l);
377 if (!mohandle.mo.mo_ttable) {
378 unmapit();
379 goto fail;
380 }
381 p = mohandle.mo.mo_otable;
382 for (i = 0; i < mohandle.mo.mo_nstring; i++) {
383 p[i].len = flip(otable[i].len, magic);
384 p[i].off = base + flip(otable[i].off, magic);
385
386 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
387 unmapit();
388 goto fail;
389 }
390 }
391 p = mohandle.mo.mo_ttable;
392 for (i = 0; i < mohandle.mo.mo_nstring; i++) {
393 p[i].len = flip(ttable[i].len, magic);
394 p[i].off = base + flip(ttable[i].off, magic);
395
396 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
397 unmapit();
398 goto fail;
399 }
400 }
401
402 /* grab MIME-header and charset field */
403 mohandle.mo.mo_header = lookup("");
404 if (mohandle.mo.mo_header)
405 v = strstr(mohandle.mo.mo_header, "charset=");
406 else
407 v = NULL;
408 if (v) {
409 mohandle.mo.mo_charset = strdup(v + 8);
410 v = strchr(mohandle.mo.mo_charset, '\n');
411 if (v)
412 *v = '\0';
413 }
414
415 /*
416 * XXX check charset, reject it if we are unable to support the charset
417 * with the current locale.
418 * for example, if we are using euc-jp locale and we are looking at
419 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
420 * the *.mo file as we cannot support it.
421 */
422
423 return 0;
424
425 fail:
426 return -1;
427 }
428
429 static int
430 unmapit()
431 {
432
433 /* unmap if there's already mapped region */
434 if (mohandle.addr)
435 munmap(mohandle.addr, mohandle.len);
436 mohandle.addr = NULL;
437 mohandle.path[0] = '\0';
438 if (mohandle.mo.mo_otable)
439 free(mohandle.mo.mo_otable);
440 if (mohandle.mo.mo_ttable)
441 free(mohandle.mo.mo_ttable);
442 if (mohandle.mo.mo_charset)
443 free(mohandle.mo.mo_charset);
444 memset(&mohandle.mo, 0, sizeof(mohandle.mo));
445 return 0;
446 }
447
448 static const char *
449 lookup_hash(msgid)
450 const char *msgid;
451 {
452
453 /*
454 * XXX should try a hashed lookup here, but to do so, we need to
455 * look inside the GPL'ed *.c and re-implement...
456 */
457 return NULL;
458 }
459
460 static const char *
461 lookup_bsearch(msgid)
462 const char *msgid;
463 {
464 size_t l;
465 int top, bottom, middle, omiddle;
466 int n;
467
468 l = strlen(msgid);
469
470 top = 0;
471 bottom = mohandle.mo.mo_nstring;
472 omiddle = -1;
473 while (1) {
474 if (top > bottom)
475 return NULL;
476 middle = (top + bottom) / 2;
477 /* avoid possible infinite loop, when the data is not sorted */
478 if (omiddle == middle)
479 return NULL;
480 if (middle < 0 || middle >= mohandle.mo.mo_nstring)
481 return NULL;
482
483 n = strcmp(msgid, mohandle.mo.mo_otable[middle].off);
484 if (n == 0)
485 return (const char *)mohandle.mo.mo_ttable[middle].off;
486 else if (n < 0)
487 bottom = middle;
488 else
489 top = middle;
490 omiddle = middle;
491 }
492
493 return NULL;
494 }
495
496 static const char *
497 lookup(msgid)
498 const char *msgid;
499 {
500 const char *v;
501
502 v = lookup_hash(msgid);
503 if (v)
504 return v;
505
506 return lookup_bsearch(msgid);
507 }
508
509 char *
510 dcngettext(domainname, msgid1, msgid2, n, category)
511 const char *domainname;
512 const char *msgid1;
513 const char *msgid2;
514 unsigned long int n;
515 int category;
516 {
517 const char *msgid;
518 char path[PATH_MAX];
519 static char lpath[PATH_MAX];
520 static char olpath[PATH_MAX];
521 const char *locale;
522 const char *language;
523 const char *cname;
524 const char *v;
525
526 msgid = (n == 1) ? msgid1 : msgid2;
527
528 if (!domainname)
529 domainname = __domainname;
530 cname = lookup_category(category);
531 if (!domainname || !cname)
532 goto fail;
533
534 language = getenv("LANGUAGE");
535 locale = setlocale(LC_MESSAGES, NULL); /*XXX*/
536 if (locale)
537 locale = split_locale(locale);
538 if (language && locale) {
539 if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
540 goto fail;
541 snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
542 } else if (language) {
543 if (strlen(language) + 1 > sizeof(lpath))
544 goto fail;
545 strlcpy(lpath, language, sizeof(lpath));
546 } else if (locale) {
547 if (strlen(locale) + 1 > sizeof(lpath))
548 goto fail;
549 strlcpy(lpath, locale, sizeof(lpath));
550 } else
551 goto fail;
552
553 /* don't bother looking it up if the values are the same */
554 if (strcmp(lpath, olpath) == 0)
555 goto found;
556
557 strlcpy(olpath, lpath, sizeof(olpath));
558
559 /* try to find appropriate file, from $LANGUAGE */
560 if (lookup_mofile(path, sizeof(path), __domainpath, lpath, cname,
561 domainname) == NULL)
562 goto fail;
563
564 found:
565 v = lookup(msgid);
566 if (v) {
567 /*
568 * XXX call iconv() here, if translated text is encoded
569 * differently from currently-selected encoding (locale).
570 * look at Content-type header in *.mo file, in string obtained
571 * by gettext("").
572 */
573
574 /*
575 * Given the amount of printf-format security issues, it may
576 * be a good idea to validate if the original msgid and the
577 * translated message format string carry the same printf-like
578 * format identifiers.
579 */
580
581 msgid = v;
582 }
583
584 fail:
585 /* LINTED const cast */
586 return (char *)msgid;
587 }
588