gettext.c revision 1.18 1 /* $NetBSD: gettext.c,v 1.18 2004/01/18 08:40:40 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 2000, 2001 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 __RCSID("$NetBSD: gettext.c,v 1.18 2004/01/18 08:40:40 yamt Exp $");
33
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 #include <sys/mman.h>
37 #include <sys/uio.h>
38
39 #include <fcntl.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <string.h>
44 #if 0
45 #include <util.h>
46 #endif
47 #include <libintl.h>
48 #include <locale.h>
49 #include "libintl_local.h"
50 #include "pathnames.h"
51
52 static const char *lookup_category __P((int));
53 static const char *split_locale __P((const char *));
54 static const char *lookup_mofile __P((char *, size_t, const char *,
55 const char *, const char *, const char *, struct domainbinding *));
56 static u_int32_t flip __P((u_int32_t, u_int32_t));
57 static int validate __P((void *, struct mohandle *));
58 static int mapit __P((const char *, struct domainbinding *));
59 static int unmapit __P((struct domainbinding *));
60 static const char *lookup_hash __P((const char *, struct domainbinding *));
61 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
62 static const char *lookup __P((const char *, struct domainbinding *));
63 static const char *get_lang_env __P((const char *));
64
65 /*
66 * shortcut functions. the main implementation resides in dcngettext().
67 */
68 char *
69 gettext(msgid)
70 const char *msgid;
71 {
72
73 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
74 }
75
76 char *
77 dgettext(domainname, msgid)
78 const char *domainname;
79 const char *msgid;
80 {
81
82 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
83 }
84
85 char *
86 dcgettext(domainname, msgid, category)
87 const char *domainname;
88 const char *msgid;
89 int category;
90 {
91
92 return dcngettext(domainname, msgid, NULL, 1UL, category);
93 }
94
95 char *
96 ngettext(msgid1, msgid2, n)
97 const char *msgid1;
98 const char *msgid2;
99 unsigned long int n;
100 {
101
102 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
103 }
104
105 char *
106 dngettext(domainname, msgid1, msgid2, n)
107 const char *domainname;
108 const char *msgid1;
109 const char *msgid2;
110 unsigned long int n;
111 {
112
113 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
114 }
115
116 /*
117 * dcngettext() -
118 * lookup internationalized message on database locale/category/domainname
119 * (like ja_JP.eucJP/LC_MESSAGES/domainname).
120 * if n equals to 1, internationalized message will be looked up for msgid1.
121 * otherwise, message will be looked up for msgid2.
122 * if the lookup fails, the function will return msgid1 or msgid2 as is.
123 *
124 * Even though the return type is "char *", caller should not rewrite the
125 * region pointed to by the return value (should be "const char *", but can't
126 * change it for compatibility with other implementations).
127 *
128 * by default (if domainname == NULL), domainname is taken from the value set
129 * by textdomain(). usually name of the application (like "ls") is used as
130 * domainname. category is usually LC_MESSAGES.
131 *
132 * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
133 * endian encoded file. both endians are supported here, as the files are in
134 * /usr/share/locale! (or we should move those files into /usr/libdata)
135 */
136
137 static const char *
138 lookup_category(category)
139 int category;
140 {
141
142 switch (category) {
143 case LC_COLLATE: return "LC_COLLATE";
144 case LC_CTYPE: return "LC_CTYPE";
145 case LC_MONETARY: return "LC_MONETARY";
146 case LC_NUMERIC: return "LC_NUMERIC";
147 case LC_TIME: return "LC_TIME";
148 case LC_MESSAGES: return "LC_MESSAGES";
149 }
150 return NULL;
151 }
152
153 /*
154 * XPG syntax: language[_territory[.codeset]][@modifier]
155 * XXX boundary check on "result" is lacking
156 */
157 static const char *
158 split_locale(lname)
159 const char *lname;
160 {
161 char buf[BUFSIZ], tmp[BUFSIZ];
162 char *l, *t, *c, *m;
163 static char result[BUFSIZ];
164
165 memset(result, 0, sizeof(result));
166
167 if (strlen(lname) + 1 > sizeof(buf)) {
168 fail:
169 return lname;
170 }
171
172 strlcpy(buf, lname, sizeof(buf));
173 m = strrchr(buf, '@');
174 if (m)
175 *m++ = '\0';
176 c = strrchr(buf, '.');
177 if (c)
178 *c++ = '\0';
179 t = strrchr(buf, '_');
180 if (t)
181 *t++ = '\0';
182 l = buf;
183 if (strlen(l) == 0)
184 goto fail;
185 if (c && !t)
186 goto fail;
187
188 if (m) {
189 if (t) {
190 if (c) {
191 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
192 l, t, c, m);
193 strlcat(result, tmp, sizeof(result));
194 strlcat(result, ":", sizeof(result));
195 }
196 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
197 strlcat(result, tmp, sizeof(result));
198 strlcat(result, ":", sizeof(result));
199 }
200 snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
201 strlcat(result, tmp, sizeof(result));
202 strlcat(result, ":", sizeof(result));
203 }
204 if (t) {
205 if (c) {
206 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
207 strlcat(result, tmp, sizeof(result));
208 strlcat(result, ":", sizeof(result));
209 }
210 snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
211 strlcat(result, tmp, sizeof(result));
212 strlcat(result, ":", sizeof(result));
213 }
214 strlcat(result, l, sizeof(result));
215
216 return result;
217 }
218
219 static const char *
220 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
221 char *buf;
222 size_t len;
223 const char *dir;
224 const char *lpath; /* list of locales to be tried */
225 const char *category;
226 const char *domainname;
227 struct domainbinding *db;
228 {
229 struct stat st;
230 char *p, *q;
231 char lpath_tmp[BUFSIZ];
232
233 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
234 q = lpath_tmp;
235 /* CONSTCOND */
236 while (1) {
237 p = strsep(&q, ":");
238 if (!p)
239 break;
240 if (!*p)
241 continue;
242
243 /* don't mess with default locales */
244 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
245 return NULL;
246
247 /* validate pathname */
248 if (strchr(p, '/') || strchr(category, '/'))
249 continue;
250 #if 1 /*?*/
251 if (strchr(domainname, '/'))
252 continue;
253 #endif
254
255 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
256 category, domainname);
257 if (stat(buf, &st) < 0)
258 continue;
259 if ((st.st_mode & S_IFMT) != S_IFREG)
260 continue;
261
262 if (mapit(buf, db) == 0)
263 return buf;
264 }
265
266 return NULL;
267 }
268
269 static u_int32_t
270 flip(v, magic)
271 u_int32_t v;
272 u_int32_t magic;
273 {
274
275 if (magic == MO_MAGIC)
276 return v;
277 else if (magic == MO_MAGIC_SWAPPED) {
278 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
279 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
280 return v;
281 } else {
282 abort();
283 /*NOTREACHED*/
284 }
285 }
286
287 static int
288 validate(arg, mohandle)
289 void *arg;
290 struct mohandle *mohandle;
291 {
292 char *p;
293
294 p = (char *)arg;
295 if (p < (char *)mohandle->addr ||
296 p > (char *)mohandle->addr + mohandle->len)
297 return 0;
298 else
299 return 1;
300 }
301
302 int
303 mapit(path, db)
304 const char *path;
305 struct domainbinding *db;
306 {
307 int fd;
308 struct stat st;
309 char *base;
310 u_int32_t magic, revision;
311 struct moentry *otable, *ttable;
312 struct moentry_h *p;
313 struct mo *mo;
314 size_t l;
315 int i;
316 char *v;
317 struct mohandle *mohandle = &db->mohandle;
318
319 if (mohandle->addr && mohandle->addr != MAP_FAILED &&
320 mohandle->mo.mo_magic)
321 return 0; /*already opened*/
322
323 unmapit(db);
324
325 #if 0
326 if (secure_path(path) != 0)
327 goto fail;
328 #endif
329 if (stat(path, &st) < 0)
330 goto fail;
331 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
332 goto fail;
333 fd = open(path, O_RDONLY);
334 if (fd < 0)
335 goto fail;
336 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
337 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
338 close(fd);
339 goto fail;
340 }
341 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
342 flip(revision, magic) != MO_REVISION) {
343 close(fd);
344 goto fail;
345 }
346 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
347 MAP_FILE | MAP_SHARED, fd, (off_t)0);
348 if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
349 close(fd);
350 goto fail;
351 }
352 close(fd);
353 mohandle->len = (size_t)st.st_size;
354
355 base = mohandle->addr;
356 mo = (struct mo *)mohandle->addr;
357
358 /* flip endian. do not flip magic number! */
359 mohandle->mo.mo_magic = mo->mo_magic;
360 mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
361 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
362
363 /* validate otable/ttable */
364 otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
365 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
366 if (!validate(otable, mohandle) ||
367 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
368 unmapit(db);
369 goto fail;
370 }
371 if (!validate(ttable, mohandle) ||
372 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
373 unmapit(db);
374 goto fail;
375 }
376
377 /* allocate [ot]table, and convert to normal pointer representation. */
378 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
379 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
380 if (!mohandle->mo.mo_otable) {
381 unmapit(db);
382 goto fail;
383 }
384 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
385 if (!mohandle->mo.mo_ttable) {
386 unmapit(db);
387 goto fail;
388 }
389 p = mohandle->mo.mo_otable;
390 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
391 p[i].len = flip(otable[i].len, magic);
392 p[i].off = base + flip(otable[i].off, magic);
393
394 if (!validate(p[i].off, mohandle) ||
395 !validate(p[i].off + p[i].len + 1, mohandle)) {
396 unmapit(db);
397 goto fail;
398 }
399 }
400 p = mohandle->mo.mo_ttable;
401 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
402 p[i].len = flip(ttable[i].len, magic);
403 p[i].off = base + flip(ttable[i].off, magic);
404
405 if (!validate(p[i].off, mohandle) ||
406 !validate(p[i].off + p[i].len + 1, mohandle)) {
407 unmapit(db);
408 goto fail;
409 }
410 }
411
412 /* grab MIME-header and charset field */
413 mohandle->mo.mo_header = lookup("", db);
414 if (mohandle->mo.mo_header)
415 v = strstr(mohandle->mo.mo_header, "charset=");
416 else
417 v = NULL;
418 if (v) {
419 mohandle->mo.mo_charset = strdup(v + 8);
420 if (!mohandle->mo.mo_charset)
421 goto fail;
422 v = strchr(mohandle->mo.mo_charset, '\n');
423 if (v)
424 *v = '\0';
425 }
426
427 /*
428 * XXX check charset, reject it if we are unable to support the charset
429 * with the current locale.
430 * for example, if we are using euc-jp locale and we are looking at
431 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
432 * the *.mo file as we cannot support it.
433 */
434
435 return 0;
436
437 fail:
438 return -1;
439 }
440
441 static int
442 unmapit(db)
443 struct domainbinding *db;
444 {
445 struct mohandle *mohandle = &db->mohandle;
446
447 /* unmap if there's already mapped region */
448 if (mohandle->addr && mohandle->addr != MAP_FAILED)
449 munmap(mohandle->addr, mohandle->len);
450 mohandle->addr = NULL;
451 if (mohandle->mo.mo_otable)
452 free(mohandle->mo.mo_otable);
453 if (mohandle->mo.mo_ttable)
454 free(mohandle->mo.mo_ttable);
455 if (mohandle->mo.mo_charset)
456 free(mohandle->mo.mo_charset);
457 memset(&mohandle->mo, 0, sizeof(mohandle->mo));
458 return 0;
459 }
460
461 /* ARGSUSED */
462 static const char *
463 lookup_hash(msgid, db)
464 const char *msgid;
465 struct domainbinding *db;
466 {
467
468 /*
469 * XXX should try a hashed lookup here, but to do so, we need to
470 * look inside the GPL'ed *.c and re-implement...
471 */
472 return NULL;
473 }
474
475 static const char *
476 lookup_bsearch(msgid, db)
477 const char *msgid;
478 struct domainbinding *db;
479 {
480 int top, bottom, middle, omiddle;
481 int n;
482 struct mohandle *mohandle = &db->mohandle;
483
484 top = 0;
485 bottom = mohandle->mo.mo_nstring;
486 omiddle = -1;
487 /* CONSTCOND */
488 while (1) {
489 if (top > bottom)
490 break;
491 middle = (top + bottom) / 2;
492 /* avoid possible infinite loop, when the data is not sorted */
493 if (omiddle == middle)
494 break;
495 if (middle < 0 || middle >= mohandle->mo.mo_nstring)
496 break;
497
498 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
499 if (n == 0)
500 return (const char *)mohandle->mo.mo_ttable[middle].off;
501 else if (n < 0)
502 bottom = middle;
503 else
504 top = middle;
505 omiddle = middle;
506 }
507
508 return NULL;
509 }
510
511 static const char *
512 lookup(msgid, db)
513 const char *msgid;
514 struct domainbinding *db;
515 {
516 const char *v;
517
518 v = lookup_hash(msgid, db);
519 if (v)
520 return v;
521
522 return lookup_bsearch(msgid, db);
523 }
524
525 static const char *
526 get_lang_env(const char *category_name)
527 {
528 const char *lang;
529
530 /* 1. see LANGUAGE variable first. */
531 lang = getenv("LANGUAGE");
532 if (lang)
533 return lang;
534
535 /* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
536 lang = getenv("LC_ALL");
537 if (!lang)
538 lang = getenv(category_name);
539 if (!lang)
540 lang = getenv("LANG");
541
542 if (!lang)
543 return 0; /* error */
544
545 return split_locale(lang);
546 }
547
548 char *
549 dcngettext(domainname, msgid1, msgid2, n, category)
550 const char *domainname;
551 const char *msgid1;
552 const char *msgid2;
553 unsigned long int n;
554 int category;
555 {
556 const char *msgid;
557 char path[PATH_MAX];
558 const char *lpath;
559 static char olpath[PATH_MAX];
560 const char *cname = NULL;
561 const char *v;
562 static char *ocname = NULL;
563 static char *odomainname = NULL;
564 struct domainbinding *db;
565
566 msgid = (n == 1) ? msgid1 : msgid2;
567 if (msgid == NULL)
568 return NULL;
569
570 if (!domainname)
571 domainname = __current_domainname;
572 cname = lookup_category(category);
573 if (!domainname || !cname)
574 goto fail;
575
576 lpath = get_lang_env(cname);
577 if (!lpath)
578 goto fail;
579
580 for (db = __bindings; db; db = db->next)
581 if (strcmp(db->domainname, domainname) == 0)
582 break;
583 if (!db) {
584 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
585 goto fail;
586 db = __bindings;
587 }
588
589 /* resolve relative path */
590 /* XXX not necessary? */
591 if (db->path[0] != '/') {
592 char buf[PATH_MAX];
593
594 if (getcwd(buf, sizeof(buf)) == 0)
595 goto fail;
596 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
597 goto fail;
598 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
599 goto fail;
600 strlcpy(db->path, buf, sizeof(db->path));
601 }
602
603 /* don't bother looking it up if the values are the same */
604 if (odomainname && strcmp(domainname, odomainname) == 0 &&
605 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
606 db->mohandle.mo.mo_magic)
607 goto found;
608
609 /* try to find appropriate file, from $LANGUAGE */
610 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
611 domainname, db) == NULL)
612 goto fail;
613
614 if (odomainname)
615 free(odomainname);
616 if (ocname)
617 free(ocname);
618 odomainname = strdup(domainname);
619 ocname = strdup(cname);
620 if (!odomainname || !ocname) {
621 if (odomainname)
622 free(odomainname);
623 if (ocname)
624 free(ocname);
625 odomainname = ocname = NULL;
626 }
627 else
628 strlcpy(olpath, lpath, sizeof(olpath));
629
630 found:
631 v = lookup(msgid, db);
632 if (v) {
633 /*
634 * convert the translated message's encoding.
635 *
636 * special case:
637 * a result of gettext("") shouldn't need any conversion.
638 */
639 if (msgid[0])
640 v = __gettext_iconv(v, db);
641
642 /*
643 * Given the amount of printf-format security issues, it may
644 * be a good idea to validate if the original msgid and the
645 * translated message format string carry the same printf-like
646 * format identifiers.
647 */
648
649 msgid = v;
650 }
651
652 fail:
653 /* LINTED const cast */
654 return (char *)msgid;
655 }
656