gettext.c revision 1.11 1 /* $NetBSD: gettext.c,v 1.11 2001/12/09 11:11:01 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 2000, 2001 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: gettext.c,v 1.11 2001/12/09 11:11:01 yamt Exp $");
34 #endif /* LIBC_SCCS and not lint */
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <sys/uio.h>
41
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <string.h>
47 #if 0
48 #include <util.h>
49 #endif
50 #include <libintl.h>
51 #include <locale.h>
52 #include "libintl_local.h"
53 #include "pathnames.h"
54
55 static const char *lookup_category __P((int));
56 static const char *split_locale __P((const char *));
57 static const char *lookup_mofile __P((char *, size_t, const char *,
58 const char *, const char *, const char *, struct domainbinding *));
59 static u_int32_t flip __P((u_int32_t, u_int32_t));
60 static int validate __P((void *, struct mohandle *));
61 static int mapit __P((const char *, struct domainbinding *));
62 static int unmapit __P((struct domainbinding *));
63 static const char *lookup_hash __P((const char *, struct domainbinding *));
64 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
65 static const char *lookup __P((const char *, struct domainbinding *));
66 static const char *get_lang_env(const char *);
67
68 /*
69 * shortcut functions. the main implementation resides in dcngettext().
70 */
71 char *
72 gettext(msgid)
73 const char *msgid;
74 {
75
76 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
77 }
78
79 char *
80 dgettext(domainname, msgid)
81 const char *domainname;
82 const char *msgid;
83 {
84
85 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
86 }
87
88 char *
89 dcgettext(domainname, msgid, category)
90 const char *domainname;
91 const char *msgid;
92 int category;
93 {
94
95 return dcngettext(domainname, msgid, NULL, 1UL, category);
96 }
97
98 char *
99 ngettext(msgid1, msgid2, n)
100 const char *msgid1;
101 const char *msgid2;
102 unsigned long int n;
103 {
104
105 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
106 }
107
108 char *
109 dngettext(domainname, msgid1, msgid2, n)
110 const char *domainname;
111 const char *msgid1;
112 const char *msgid2;
113 unsigned long int n;
114 {
115
116 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
117 }
118
119 /*
120 * dcngettext() -
121 * lookup internationalized message on database locale/category/domainname
122 * (like ja_JP.eucJP/LC_MESSAGES/domainname).
123 * if n equals to 1, internationalized message will be looked up for msgid1.
124 * otherwise, message will be looked up for msgid2.
125 * if the lookup fails, the function will return msgid1 or msgid2 as is.
126 *
127 * Even though the return type is "char *", caller should not rewrite the
128 * region pointed to by the return value (should be "const char *", but can't
129 * change it for compatibility with other implementations).
130 *
131 * by default (if domainname == NULL), domainname is taken from the value set
132 * by textdomain(). usually name of the application (like "ls") is used as
133 * domainname. category is usually LC_MESSAGES.
134 *
135 * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
136 * endian encoded file. both endians are supported here, as the files are in
137 * /usr/share/locale! (or we should move those files into /usr/libdata)
138 */
139
140 static const char *
141 lookup_category(category)
142 int category;
143 {
144
145 switch (category) {
146 case LC_COLLATE: return "LC_COLLATE";
147 case LC_CTYPE: return "LC_CTYPE";
148 case LC_MONETARY: return "LC_MONETARY";
149 case LC_NUMERIC: return "LC_NUMERIC";
150 case LC_TIME: return "LC_TIME";
151 case LC_MESSAGES: return "LC_MESSAGES";
152 }
153 return NULL;
154 }
155
156 /*
157 * XPG syntax: language[_territory[.codeset]][@modifier]
158 * XXX boundary check on "result" is lacking
159 */
160 static const char *
161 split_locale(lname)
162 const char *lname;
163 {
164 char buf[BUFSIZ], tmp[BUFSIZ];
165 char *l, *t, *c, *m;
166 static char result[BUFSIZ];
167
168 memset(result, 0, sizeof(result));
169
170 if (strlen(lname) + 1 > sizeof(buf)) {
171 fail:
172 return lname;
173 }
174
175 strlcpy(buf, lname, sizeof(buf));
176 m = strrchr(buf, '@');
177 if (m)
178 *m++ = '\0';
179 c = strrchr(buf, '.');
180 if (c)
181 *c++ = '\0';
182 t = strrchr(buf, '_');
183 if (t)
184 *t++ = '\0';
185 l = buf;
186 if (strlen(l) == 0)
187 goto fail;
188 if (c && !t)
189 goto fail;
190
191 if (m) {
192 if (t) {
193 if (c) {
194 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
195 l, t, c, m);
196 strlcat(result, tmp, sizeof(result));
197 strlcat(result, ":", sizeof(result));
198 }
199 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
200 strlcat(result, tmp, sizeof(result));
201 strlcat(result, ":", sizeof(result));
202 }
203 snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
204 strlcat(result, tmp, sizeof(result));
205 strlcat(result, ":", sizeof(result));
206 }
207 if (t) {
208 if (c) {
209 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
210 strlcat(result, tmp, sizeof(result));
211 strlcat(result, ":", sizeof(result));
212 }
213 strlcat(result, tmp, sizeof(result));
214 strlcat(result, ":", sizeof(result));
215 }
216 strlcat(result, l, sizeof(result));
217
218 return result;
219 }
220
221 static const char *
222 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
223 char *buf;
224 size_t len;
225 const char *dir;
226 const char *lpath; /* list of locales to be tried */
227 const char *category;
228 const char *domainname;
229 struct domainbinding *db;
230 {
231 struct stat st;
232 char *p, *q;
233 char lpath_tmp[BUFSIZ];
234
235 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
236 q = lpath_tmp;
237 /* CONSTCOND */
238 while (1) {
239 p = strsep(&q, ":");
240 if (!p)
241 break;
242 if (!*p)
243 continue;
244
245 /* don't mess with default locales */
246 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
247 return NULL;
248
249 /* validate pathname */
250 if (strchr(p, '/') || strchr(category, '/'))
251 continue;
252 #if 1 /*?*/
253 if (strchr(domainname, '/'))
254 continue;
255 #endif
256
257 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
258 category, domainname);
259 if (stat(buf, &st) < 0)
260 continue;
261 if ((st.st_mode & S_IFMT) != S_IFREG)
262 continue;
263
264 if (mapit(buf, db) == 0)
265 return buf;
266 }
267
268 return NULL;
269 }
270
271 static u_int32_t
272 flip(v, magic)
273 u_int32_t v;
274 u_int32_t magic;
275 {
276
277 if (magic == MO_MAGIC)
278 return v;
279 else if (magic == MO_MAGIC_SWAPPED) {
280 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
281 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
282 return v;
283 } else {
284 abort();
285 /*NOTREACHED*/
286 }
287 }
288
289 static int
290 validate(arg, mohandle)
291 void *arg;
292 struct mohandle *mohandle;
293 {
294 char *p;
295
296 p = (char *)arg;
297 if (p < (char *)mohandle->addr ||
298 p > (char *)mohandle->addr + mohandle->len)
299 return 0;
300 else
301 return 1;
302 }
303
304 int
305 mapit(path, db)
306 const char *path;
307 struct domainbinding *db;
308 {
309 int fd;
310 struct stat st;
311 char *base;
312 u_int32_t magic, revision;
313 struct moentry *otable, *ttable;
314 struct moentry_h *p;
315 struct mo *mo;
316 size_t l;
317 int i;
318 char *v;
319 struct mohandle *mohandle = &db->mohandle;
320
321 if (mohandle->addr && mohandle->addr != MAP_FAILED &&
322 mohandle->mo.mo_magic)
323 return 0; /*already opened*/
324
325 unmapit(db);
326
327 #if 0
328 if (secure_path(path) != 0)
329 goto fail;
330 #endif
331 if (stat(path, &st) < 0)
332 goto fail;
333 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
334 goto fail;
335 fd = open(path, O_RDONLY);
336 if (fd < 0)
337 goto fail;
338 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
339 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
340 close(fd);
341 goto fail;
342 }
343 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
344 flip(revision, magic) != MO_REVISION) {
345 close(fd);
346 goto fail;
347 }
348 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
349 MAP_FILE | MAP_SHARED, fd, (off_t)0);
350 if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
351 close(fd);
352 goto fail;
353 }
354 close(fd);
355 mohandle->len = (size_t)st.st_size;
356
357 base = mohandle->addr;
358 mo = (struct mo *)mohandle->addr;
359
360 /* flip endian. do not flip magic number! */
361 mohandle->mo.mo_magic = mo->mo_magic;
362 mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
363 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
364
365 /* validate otable/ttable */
366 otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
367 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
368 if (!validate(otable, mohandle) ||
369 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
370 unmapit(db);
371 goto fail;
372 }
373 if (!validate(ttable, mohandle) ||
374 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
375 unmapit(db);
376 goto fail;
377 }
378
379 /* allocate [ot]table, and convert to normal pointer representation. */
380 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
381 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
382 if (!mohandle->mo.mo_otable) {
383 unmapit(db);
384 goto fail;
385 }
386 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
387 if (!mohandle->mo.mo_ttable) {
388 unmapit(db);
389 goto fail;
390 }
391 p = mohandle->mo.mo_otable;
392 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
393 p[i].len = flip(otable[i].len, magic);
394 p[i].off = base + flip(otable[i].off, magic);
395
396 if (!validate(p[i].off, mohandle) ||
397 !validate(p[i].off + p[i].len + 1, mohandle)) {
398 unmapit(db);
399 goto fail;
400 }
401 }
402 p = mohandle->mo.mo_ttable;
403 for (i = 0; i < mohandle->mo.mo_nstring; i++) {
404 p[i].len = flip(ttable[i].len, magic);
405 p[i].off = base + flip(ttable[i].off, magic);
406
407 if (!validate(p[i].off, mohandle) ||
408 !validate(p[i].off + p[i].len + 1, mohandle)) {
409 unmapit(db);
410 goto fail;
411 }
412 }
413
414 /* grab MIME-header and charset field */
415 mohandle->mo.mo_header = lookup("", db);
416 if (mohandle->mo.mo_header)
417 v = strstr(mohandle->mo.mo_header, "charset=");
418 else
419 v = NULL;
420 if (v) {
421 mohandle->mo.mo_charset = strdup(v + 8);
422 if (!mohandle->mo.mo_charset)
423 goto fail;
424 v = strchr(mohandle->mo.mo_charset, '\n');
425 if (v)
426 *v = '\0';
427 }
428
429 /*
430 * XXX check charset, reject it if we are unable to support the charset
431 * with the current locale.
432 * for example, if we are using euc-jp locale and we are looking at
433 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
434 * the *.mo file as we cannot support it.
435 */
436
437 return 0;
438
439 fail:
440 return -1;
441 }
442
443 static int
444 unmapit(db)
445 struct domainbinding *db;
446 {
447 struct mohandle *mohandle = &db->mohandle;
448
449 /* unmap if there's already mapped region */
450 if (mohandle->addr && mohandle->addr != MAP_FAILED)
451 munmap(mohandle->addr, mohandle->len);
452 mohandle->addr = NULL;
453 if (mohandle->mo.mo_otable)
454 free(mohandle->mo.mo_otable);
455 if (mohandle->mo.mo_ttable)
456 free(mohandle->mo.mo_ttable);
457 if (mohandle->mo.mo_charset)
458 free(mohandle->mo.mo_charset);
459 memset(&mohandle->mo, 0, sizeof(mohandle->mo));
460 return 0;
461 }
462
463 /* ARGSUSED */
464 static const char *
465 lookup_hash(msgid, db)
466 const char *msgid;
467 struct domainbinding *db;
468 {
469
470 /*
471 * XXX should try a hashed lookup here, but to do so, we need to
472 * look inside the GPL'ed *.c and re-implement...
473 */
474 return NULL;
475 }
476
477 static const char *
478 lookup_bsearch(msgid, db)
479 const char *msgid;
480 struct domainbinding *db;
481 {
482 int top, bottom, middle, omiddle;
483 int n;
484 struct mohandle *mohandle = &db->mohandle;
485
486 top = 0;
487 bottom = mohandle->mo.mo_nstring;
488 omiddle = -1;
489 /* CONSTCOND */
490 while (1) {
491 if (top > bottom)
492 break;
493 middle = (top + bottom) / 2;
494 /* avoid possible infinite loop, when the data is not sorted */
495 if (omiddle == middle)
496 break;
497 if (middle < 0 || middle >= mohandle->mo.mo_nstring)
498 break;
499
500 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
501 if (n == 0)
502 return (const char *)mohandle->mo.mo_ttable[middle].off;
503 else if (n < 0)
504 bottom = middle;
505 else
506 top = middle;
507 omiddle = middle;
508 }
509
510 return NULL;
511 }
512
513 static const char *
514 lookup(msgid, db)
515 const char *msgid;
516 struct domainbinding *db;
517 {
518 const char *v;
519
520 v = lookup_hash(msgid, db);
521 if (v)
522 return v;
523
524 return lookup_bsearch(msgid, db);
525 }
526
527 static const char *get_lang_env(const char *category_name)
528 {
529 const char *lang;
530
531 /* 1. see LANGUAGE variable first. */
532 lang = getenv("LANGUAGE");
533 if (lang)
534 return lang;
535
536 /* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
537 lang = getenv(category_name);
538 if (!lang)
539 lang = getenv("LC_ALL");
540 if (!lang)
541 lang = getenv("LANG");
542
543 if (!lang)
544 return 0; /* error */
545
546 return split_locale(lang);
547 }
548
549 char *
550 dcngettext(domainname, msgid1, msgid2, n, category)
551 const char *domainname;
552 const char *msgid1;
553 const char *msgid2;
554 unsigned long int n;
555 int category;
556 {
557 const char *msgid;
558 char path[PATH_MAX];
559 const char *lpath;
560 static char olpath[PATH_MAX];
561 const char *cname = NULL;
562 const char *v;
563 static char *ocname = NULL;
564 static char *odomainname = NULL;
565 struct domainbinding *db;
566
567 msgid = (n == 1) ? msgid1 : msgid2;
568 if (msgid == NULL)
569 return NULL;
570
571 if (!domainname)
572 domainname = __current_domainname;
573 cname = lookup_category(category);
574 if (!domainname || !cname)
575 goto fail;
576
577 lpath = get_lang_env(cname);
578 if (!lpath)
579 goto fail;
580
581 for (db = __bindings; db; db = db->next)
582 if (strcmp(db->domainname, domainname) == 0)
583 break;
584 if (!db) {
585 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
586 goto fail;
587 db = __bindings;
588 }
589
590 /* resolve relative path */
591 /* XXX not necessary? */
592 if (db->path[0] != '/') {
593 char buf[PATH_MAX];
594
595 if (getcwd(buf, sizeof(buf)) == 0)
596 goto fail;
597 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
598 goto fail;
599 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
600 goto fail;
601 strcpy(db->path, buf);
602 }
603
604 /* don't bother looking it up if the values are the same */
605 if (odomainname && strcmp(domainname, odomainname) == 0 &&
606 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
607 db->mohandle.mo.mo_magic)
608 goto found;
609
610 /* try to find appropriate file, from $LANGUAGE */
611 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
612 domainname, db) == NULL)
613 goto fail;
614
615 if (odomainname)
616 free(odomainname);
617 if (ocname)
618 free(ocname);
619 odomainname = strdup(domainname);
620 ocname = strdup(cname);
621 if (!odomainname || !ocname) {
622 if (odomainname)
623 free(odomainname);
624 if (ocname)
625 free(ocname);
626 odomainname = ocname = NULL;
627 }
628 else
629 strlcpy(olpath, lpath, sizeof(olpath));
630
631 found:
632 v = lookup(msgid, db);
633 if (v) {
634 /*
635 * XXX call iconv() here, if translated text is encoded
636 * differently from currently-selected encoding (locale).
637 * look at Content-type header in *.mo file, in string obtained
638 * by gettext("").
639 */
640
641 /*
642 * Given the amount of printf-format security issues, it may
643 * be a good idea to validate if the original msgid and the
644 * translated message format string carry the same printf-like
645 * format identifiers.
646 */
647
648 msgid = v;
649 }
650
651 fail:
652 /* LINTED const cast */
653 return (char *)msgid;
654 }
655