gettext.c revision 1.19 1 /* $NetBSD: gettext.c,v 1.19 2004/09/23 16:44:26 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c) 2000, 2001 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 __RCSID("$NetBSD: gettext.c,v 1.19 2004/09/23 16:44:26 tshiozak Exp $");
33
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 #include <sys/mman.h>
37 #include <sys/uio.h>
38
39 #include <assert.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52
53 static const char *lookup_category __P((int));
54 static const char *split_locale __P((const char *));
55 static const char *lookup_mofile __P((char *, size_t, const char *,
56 const char *, const char *, const char *, struct domainbinding *));
57 static u_int32_t flip __P((u_int32_t, u_int32_t));
58 static int validate __P((void *, struct mohandle *));
59 static int mapit __P((const char *, struct domainbinding *));
60 static int unmapit __P((struct domainbinding *));
61 static const char *lookup_hash __P((const char *, struct domainbinding *));
62 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
63 static const char *lookup __P((const char *, struct domainbinding *));
64 static const char *get_lang_env __P((const char *));
65
66 /*
67 * shortcut functions. the main implementation resides in dcngettext().
68 */
69 char *
70 gettext(msgid)
71 const char *msgid;
72 {
73
74 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
75 }
76
77 char *
78 dgettext(domainname, msgid)
79 const char *domainname;
80 const char *msgid;
81 {
82
83 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
84 }
85
86 char *
87 dcgettext(domainname, msgid, category)
88 const char *domainname;
89 const char *msgid;
90 int category;
91 {
92
93 return dcngettext(domainname, msgid, NULL, 1UL, category);
94 }
95
96 char *
97 ngettext(msgid1, msgid2, n)
98 const char *msgid1;
99 const char *msgid2;
100 unsigned long int n;
101 {
102
103 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
104 }
105
106 char *
107 dngettext(domainname, msgid1, msgid2, n)
108 const char *domainname;
109 const char *msgid1;
110 const char *msgid2;
111 unsigned long int n;
112 {
113
114 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
115 }
116
117 /*
118 * dcngettext() -
119 * lookup internationalized message on database locale/category/domainname
120 * (like ja_JP.eucJP/LC_MESSAGES/domainname).
121 * if n equals to 1, internationalized message will be looked up for msgid1.
122 * otherwise, message will be looked up for msgid2.
123 * if the lookup fails, the function will return msgid1 or msgid2 as is.
124 *
125 * Even though the return type is "char *", caller should not rewrite the
126 * region pointed to by the return value (should be "const char *", but can't
127 * change it for compatibility with other implementations).
128 *
129 * by default (if domainname == NULL), domainname is taken from the value set
130 * by textdomain(). usually name of the application (like "ls") is used as
131 * domainname. category is usually LC_MESSAGES.
132 *
133 * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
134 * endian encoded file. both endians are supported here, as the files are in
135 * /usr/share/locale! (or we should move those files into /usr/libdata)
136 */
137
138 static const char *
139 lookup_category(category)
140 int category;
141 {
142
143 switch (category) {
144 case LC_COLLATE: return "LC_COLLATE";
145 case LC_CTYPE: return "LC_CTYPE";
146 case LC_MONETARY: return "LC_MONETARY";
147 case LC_NUMERIC: return "LC_NUMERIC";
148 case LC_TIME: return "LC_TIME";
149 case LC_MESSAGES: return "LC_MESSAGES";
150 }
151 return NULL;
152 }
153
154 /*
155 * XPG syntax: language[_territory[.codeset]][@modifier]
156 * XXX boundary check on "result" is lacking
157 */
158 static const char *
159 split_locale(lname)
160 const char *lname;
161 {
162 char buf[BUFSIZ], tmp[BUFSIZ];
163 char *l, *t, *c, *m;
164 static char result[BUFSIZ];
165
166 memset(result, 0, sizeof(result));
167
168 if (strlen(lname) + 1 > sizeof(buf)) {
169 fail:
170 return lname;
171 }
172
173 strlcpy(buf, lname, sizeof(buf));
174 m = strrchr(buf, '@');
175 if (m)
176 *m++ = '\0';
177 c = strrchr(buf, '.');
178 if (c)
179 *c++ = '\0';
180 t = strrchr(buf, '_');
181 if (t)
182 *t++ = '\0';
183 l = buf;
184 if (strlen(l) == 0)
185 goto fail;
186 if (c && !t)
187 goto fail;
188
189 if (m) {
190 if (t) {
191 if (c) {
192 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
193 l, t, c, m);
194 strlcat(result, tmp, sizeof(result));
195 strlcat(result, ":", sizeof(result));
196 }
197 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
198 strlcat(result, tmp, sizeof(result));
199 strlcat(result, ":", sizeof(result));
200 }
201 snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
202 strlcat(result, tmp, sizeof(result));
203 strlcat(result, ":", sizeof(result));
204 }
205 if (t) {
206 if (c) {
207 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
208 strlcat(result, tmp, sizeof(result));
209 strlcat(result, ":", sizeof(result));
210 }
211 snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
212 strlcat(result, tmp, sizeof(result));
213 strlcat(result, ":", sizeof(result));
214 }
215 strlcat(result, l, sizeof(result));
216
217 return result;
218 }
219
220 static const char *
221 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
222 char *buf;
223 size_t len;
224 const char *dir;
225 const char *lpath; /* list of locales to be tried */
226 const char *category;
227 const char *domainname;
228 struct domainbinding *db;
229 {
230 struct stat st;
231 char *p, *q;
232 char lpath_tmp[BUFSIZ];
233
234 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
235 q = lpath_tmp;
236 /* CONSTCOND */
237 while (1) {
238 p = strsep(&q, ":");
239 if (!p)
240 break;
241 if (!*p)
242 continue;
243
244 /* don't mess with default locales */
245 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
246 return NULL;
247
248 /* validate pathname */
249 if (strchr(p, '/') || strchr(category, '/'))
250 continue;
251 #if 1 /*?*/
252 if (strchr(domainname, '/'))
253 continue;
254 #endif
255
256 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
257 category, domainname);
258 if (stat(buf, &st) < 0)
259 continue;
260 if ((st.st_mode & S_IFMT) != S_IFREG)
261 continue;
262
263 if (mapit(buf, db) == 0)
264 return buf;
265 }
266
267 return NULL;
268 }
269
270 static u_int32_t
271 flip(v, magic)
272 u_int32_t v;
273 u_int32_t magic;
274 {
275
276 if (magic == MO_MAGIC)
277 return v;
278 else if (magic == MO_MAGIC_SWAPPED) {
279 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
280 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
281 return v;
282 } else {
283 abort();
284 /*NOTREACHED*/
285 }
286 }
287
288 static int
289 validate(arg, mohandle)
290 void *arg;
291 struct mohandle *mohandle;
292 {
293 char *p;
294
295 p = (char *)arg;
296 if (p < (char *)mohandle->addr ||
297 p > (char *)mohandle->addr + mohandle->len)
298 return 0;
299 else
300 return 1;
301 }
302
303 int
304 mapit(path, db)
305 const char *path;
306 struct domainbinding *db;
307 {
308 int fd;
309 struct stat st;
310 char *base;
311 u_int32_t magic, revision;
312 struct moentry *otable, *ttable;
313 const u_int32_t *htable;
314 struct moentry_h *p;
315 struct mo *mo;
316 size_t l;
317 int i;
318 char *v;
319 struct mohandle *mohandle = &db->mohandle;
320
321 if (mohandle->addr && mohandle->addr != MAP_FAILED &&
322 mohandle->mo.mo_magic)
323 return 0; /*already opened*/
324
325 unmapit(db);
326
327 #if 0
328 if (secure_path(path) != 0)
329 goto fail;
330 #endif
331 if (stat(path, &st) < 0)
332 goto fail;
333 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
334 goto fail;
335 fd = open(path, O_RDONLY);
336 if (fd < 0)
337 goto fail;
338 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
339 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
340 close(fd);
341 goto fail;
342 }
343 if (read(fd, &revision, sizeof(revision)) != sizeof(revision)) {
344 close(fd);
345 goto fail;
346 }
347 switch (flip(revision, magic)) {
348 case MO_MAKE_REV(0, 0):
349 #if 0
350 case MO_MAKE_REV(0, 1):
351 case MO_MAKE_REV(1, 1):
352 #endif
353 break;
354 default:
355 close(fd);
356 goto fail;
357 }
358 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
359 MAP_FILE | MAP_SHARED, fd, (off_t)0);
360 if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
361 close(fd);
362 goto fail;
363 }
364 close(fd);
365 mohandle->len = (size_t)st.st_size;
366
367 base = mohandle->addr;
368 mo = (struct mo *)mohandle->addr;
369
370 /* flip endian. do not flip magic number! */
371 mohandle->mo.mo_magic = mo->mo_magic;
372 mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
373 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
374 mohandle->mo.mo_hsize = flip(mo->mo_hsize, magic);
375
376 /* validate otable/ttable */
377 /* LINTED: ignore the alignment problem. */
378 otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
379 /* LINTED: ignore the alignment problem. */
380 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
381 if (!validate(otable, mohandle) ||
382 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
383 unmapit(db);
384 goto fail;
385 }
386 if (!validate(ttable, mohandle) ||
387 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
388 unmapit(db);
389 goto fail;
390 }
391
392 /* allocate [ot]table, and convert to normal pointer representation. */
393 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
394 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
395 if (!mohandle->mo.mo_otable) {
396 unmapit(db);
397 goto fail;
398 }
399 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
400 if (!mohandle->mo.mo_ttable) {
401 unmapit(db);
402 goto fail;
403 }
404 p = mohandle->mo.mo_otable;
405 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
406 p[i].len = flip(otable[i].len, magic);
407 p[i].off = base + flip(otable[i].off, magic);
408
409 if (!validate(p[i].off, mohandle) ||
410 !validate(p[i].off + p[i].len + 1, mohandle)) {
411 unmapit(db);
412 goto fail;
413 }
414 }
415 p = mohandle->mo.mo_ttable;
416 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
417 p[i].len = flip(ttable[i].len, magic);
418 p[i].off = base + flip(ttable[i].off, magic);
419
420 if (!validate(p[i].off, mohandle) ||
421 !validate(p[i].off + p[i].len + 1, mohandle)) {
422 unmapit(db);
423 goto fail;
424 }
425 }
426 /* allocate htable, and convert it to the host order. */
427 if (mohandle->mo.mo_hsize > 2) {
428 l = sizeof(u_int32_t) * mohandle->mo.mo_hsize;
429 mohandle->mo.mo_htable = (u_int32_t *)malloc(l);
430 if (!mohandle->mo.mo_htable) {
431 unmapit(db);
432 goto fail;
433 }
434 /* LINTED: ignore the alignment problem. */
435 htable = (const u_int32_t *)(base+flip(mo->mo_hoffset, magic));
436 for (i=0; i < mohandle->mo.mo_hsize; i++) {
437 mohandle->mo.mo_htable[i] = flip(htable[i], magic);
438 if (mohandle->mo.mo_htable[i] >=
439 mohandle->mo.mo_nstring+1) {
440 /* illegal string number. */
441 unmapit(db);
442 goto fail;
443 }
444 }
445 }
446 /* grab MIME-header and charset field */
447 mohandle->mo.mo_header = lookup("", db);
448 if (mohandle->mo.mo_header)
449 v = strstr(mohandle->mo.mo_header, "charset=");
450 else
451 v = NULL;
452 if (v) {
453 mohandle->mo.mo_charset = strdup(v + 8);
454 if (!mohandle->mo.mo_charset)
455 goto fail;
456 v = strchr(mohandle->mo.mo_charset, '\n');
457 if (v)
458 *v = '\0';
459 }
460
461 /*
462 * XXX check charset, reject it if we are unable to support the charset
463 * with the current locale.
464 * for example, if we are using euc-jp locale and we are looking at
465 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
466 * the *.mo file as we cannot support it.
467 */
468
469 return 0;
470
471 fail:
472 return -1;
473 }
474
475 static int
476 unmapit(db)
477 struct domainbinding *db;
478 {
479 struct mohandle *mohandle = &db->mohandle;
480
481 /* unmap if there's already mapped region */
482 if (mohandle->addr && mohandle->addr != MAP_FAILED)
483 munmap(mohandle->addr, mohandle->len);
484 mohandle->addr = NULL;
485 if (mohandle->mo.mo_otable)
486 free(mohandle->mo.mo_otable);
487 if (mohandle->mo.mo_ttable)
488 free(mohandle->mo.mo_ttable);
489 if (mohandle->mo.mo_charset)
490 free(mohandle->mo.mo_charset);
491 if (mohandle->mo.mo_htable)
492 free(mohandle->mo.mo_htable);
493 memset(&mohandle->mo, 0, sizeof(mohandle->mo));
494 return 0;
495 }
496
497 /*
498 * calculate the step value if the hash value is conflicted.
499 */
500 static __inline u_int32_t
501 calc_collision_step(u_int32_t hashval, u_int32_t hashsize)
502 {
503 _DIAGASSERT(hashsize>2);
504 return (hashval % (hashsize - 2)) + 1;
505 }
506
507 /*
508 * calculate the next index while conflicting.
509 */
510 static __inline u_int32_t
511 calc_next_index(u_int32_t curidx, u_int32_t hashsize, u_int32_t step)
512 {
513 return curidx+step - (curidx >= hashsize-step ? hashsize : 0);
514 }
515
516 /* ARGSUSED */
517 static const char *
518 lookup_hash(msgid, db)
519 const char *msgid;
520 struct domainbinding *db;
521 {
522 struct mohandle *mohandle = &db->mohandle;
523 u_int32_t idx, hashval, step, strno;
524 size_t len;
525
526 if (mohandle->mo.mo_hsize <= 2 || mohandle->mo.mo_htable == NULL)
527 return NULL;
528
529 hashval = __intl_string_hash(msgid);
530 step = calc_collision_step(hashval, mohandle->mo.mo_hsize);
531 idx = hashval % mohandle->mo.mo_hsize;
532 len = strlen(msgid);
533 while (/*CONSTCOND*/1) {
534 strno = mohandle->mo.mo_htable[idx];
535 if (strno == 0) {
536 /* unexpected miss */
537 return NULL;
538 }
539 strno--;
540 if (len <= mohandle->mo.mo_otable[strno].len &&
541 !strcmp(msgid, mohandle->mo.mo_otable[strno].off)) {
542 /* hit */
543 return mohandle->mo.mo_ttable[strno].off;
544 }
545 idx = calc_next_index(idx, mohandle->mo.mo_hsize, step);
546 }
547 /*NOTREACHED*/
548 }
549
550 static const char *
551 lookup_bsearch(msgid, db)
552 const char *msgid;
553 struct domainbinding *db;
554 {
555 int top, bottom, middle, omiddle;
556 int n;
557 struct mohandle *mohandle = &db->mohandle;
558
559 top = 0;
560 bottom = mohandle->mo.mo_nstring;
561 omiddle = -1;
562 /* CONSTCOND */
563 while (1) {
564 if (top > bottom)
565 break;
566 middle = (top + bottom) / 2;
567 /* avoid possible infinite loop, when the data is not sorted */
568 if (omiddle == middle)
569 break;
570 if (middle < 0 || middle >= mohandle->mo.mo_nstring)
571 break;
572
573 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
574 if (n == 0)
575 return (const char *)mohandle->mo.mo_ttable[middle].off;
576 else if (n < 0)
577 bottom = middle;
578 else
579 top = middle;
580 omiddle = middle;
581 }
582
583 return NULL;
584 }
585
586 static const char *
587 lookup(msgid, db)
588 const char *msgid;
589 struct domainbinding *db;
590 {
591 const char *v;
592
593 v = lookup_hash(msgid, db);
594 if (v)
595 return v;
596
597 return lookup_bsearch(msgid, db);
598 }
599
600 static const char *
601 get_lang_env(const char *category_name)
602 {
603 const char *lang;
604
605 /* 1. see LANGUAGE variable first. */
606 lang = getenv("LANGUAGE");
607 if (lang)
608 return lang;
609
610 /* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
611 lang = getenv("LC_ALL");
612 if (!lang)
613 lang = getenv(category_name);
614 if (!lang)
615 lang = getenv("LANG");
616
617 if (!lang)
618 return 0; /* error */
619
620 return split_locale(lang);
621 }
622
623 char *
624 dcngettext(domainname, msgid1, msgid2, n, category)
625 const char *domainname;
626 const char *msgid1;
627 const char *msgid2;
628 unsigned long int n;
629 int category;
630 {
631 const char *msgid;
632 char path[PATH_MAX];
633 const char *lpath;
634 static char olpath[PATH_MAX];
635 const char *cname = NULL;
636 const char *v;
637 static char *ocname = NULL;
638 static char *odomainname = NULL;
639 struct domainbinding *db;
640
641 msgid = (n == 1) ? msgid1 : msgid2;
642 if (msgid == NULL)
643 return NULL;
644
645 if (!domainname)
646 domainname = __current_domainname;
647 cname = lookup_category(category);
648 if (!domainname || !cname)
649 goto fail;
650
651 lpath = get_lang_env(cname);
652 if (!lpath)
653 goto fail;
654
655 for (db = __bindings; db; db = db->next)
656 if (strcmp(db->domainname, domainname) == 0)
657 break;
658 if (!db) {
659 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
660 goto fail;
661 db = __bindings;
662 }
663
664 /* resolve relative path */
665 /* XXX not necessary? */
666 if (db->path[0] != '/') {
667 char buf[PATH_MAX];
668
669 if (getcwd(buf, sizeof(buf)) == 0)
670 goto fail;
671 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
672 goto fail;
673 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
674 goto fail;
675 strlcpy(db->path, buf, sizeof(db->path));
676 }
677
678 /* don't bother looking it up if the values are the same */
679 if (odomainname && strcmp(domainname, odomainname) == 0 &&
680 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
681 db->mohandle.mo.mo_magic)
682 goto found;
683
684 /* try to find appropriate file, from $LANGUAGE */
685 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
686 domainname, db) == NULL)
687 goto fail;
688
689 if (odomainname)
690 free(odomainname);
691 if (ocname)
692 free(ocname);
693 odomainname = strdup(domainname);
694 ocname = strdup(cname);
695 if (!odomainname || !ocname) {
696 if (odomainname)
697 free(odomainname);
698 if (ocname)
699 free(ocname);
700 odomainname = ocname = NULL;
701 }
702 else
703 strlcpy(olpath, lpath, sizeof(olpath));
704
705 found:
706 v = lookup(msgid, db);
707 if (v) {
708 /*
709 * convert the translated message's encoding.
710 *
711 * special case:
712 * a result of gettext("") shouldn't need any conversion.
713 */
714 if (msgid[0])
715 v = __gettext_iconv(v, db);
716
717 /*
718 * Given the amount of printf-format security issues, it may
719 * be a good idea to validate if the original msgid and the
720 * translated message format string carry the same printf-like
721 * format identifiers.
722 */
723
724 msgid = v;
725 }
726
727 fail:
728 return (char *)__UNCONST(msgid);
729 }
730