gettext.c revision 1.8 1 /* $NetBSD: gettext.c,v 1.8 2001/02/15 10:48:31 minoura Exp $ */
2
3 /*-
4 * Copyright (c) 2000 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: gettext.c,v 1.8 2001/02/15 10:48:31 minoura Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <sys/uio.h>
39
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52
53 static struct mohandle mohandle;
54
55 static const char *lookup_category __P((int));
56 static const char *split_locale __P((const char *));
57 static const char *lookup_mofile __P((char *, size_t, const char *,
58 char *, const char *, const char *));
59 static u_int32_t flip __P((u_int32_t, u_int32_t));
60 static int validate __P((void *));
61 static int mapit __P((const char *));
62 static int unmapit __P((void));
63 static const char *lookup_hash __P((const char *));
64 static const char *lookup_bsearch __P((const char *));
65 static const char *lookup __P((const char *));
66
67 /*
68 * shortcut functions. the main implementation resides in dcngettext().
69 */
70 char *
71 gettext(msgid)
72 const char *msgid;
73 {
74
75 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
76 }
77
78 char *
79 dgettext(domainname, msgid)
80 const char *domainname;
81 const char *msgid;
82 {
83
84 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
85 }
86
87 char *
88 dcgettext(domainname, msgid, category)
89 const char *domainname;
90 const char *msgid;
91 int category;
92 {
93
94 return dcngettext(domainname, msgid, NULL, 1UL, category);
95 }
96
97 char *
98 ngettext(msgid1, msgid2, n)
99 const char *msgid1;
100 const char *msgid2;
101 unsigned long int n;
102 {
103
104 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
105 }
106
107 char *
108 dngettext(domainname, msgid1, msgid2, n)
109 const char *domainname;
110 const char *msgid1;
111 const char *msgid2;
112 unsigned long int n;
113 {
114
115 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
116 }
117
118 /*
119 * dcngettext() -
120 * lookup internationalized message on database locale/category/domainname
121 * (like ja_JP.eucJP/LC_MESSAGES/domainname).
122 * if n equals to 1, internationalized message will be looked up for msgid1.
123 * otherwise, message will be looked up for msgid2.
124 * if the lookup fails, the function will return msgid1 or msgid2 as is.
125 *
126 * Even though the return type is "char *", caller should not rewrite the
127 * region pointed to by the return value (should be "const char *", but can't
128 * change it for compatibility with other implementations).
129 *
130 * by default (if domainname == NULL), domainname is taken from the value set
131 * by textdomain(). usually name of the application (like "ls") is used as
132 * domainname. category is usually LC_MESSAGES.
133 *
134 * the code reads in *.mo files generated by GNU gettext. *.mo is a host-
135 * endian encoded file. both endians are supported here, as the files are in
136 * /usr/share/locale! (or we should move those files into /usr/libdata)
137 */
138
139 static const char *
140 lookup_category(category)
141 int category;
142 {
143
144 switch (category) {
145 case LC_COLLATE: return "LC_COLLATE";
146 case LC_CTYPE: return "LC_CTYPE";
147 case LC_MONETARY: return "LC_MONETARY";
148 case LC_NUMERIC: return "LC_NUMERIC";
149 case LC_TIME: return "LC_TIME";
150 case LC_MESSAGES: return "LC_MESSAGES";
151 }
152 return NULL;
153 }
154
155 /*
156 * XPG syntax: language[_territory[.codeset]][@modifier]
157 * XXX boundary check on "result" is lacking
158 */
159 static const char *
160 split_locale(lname)
161 const char *lname;
162 {
163 char buf[BUFSIZ], tmp[BUFSIZ];
164 char *l, *t, *c, *m;
165 static char result[BUFSIZ];
166
167 memset(result, 0, sizeof(result));
168
169 if (strlen(lname) + 1 > sizeof(buf)) {
170 fail:
171 return lname;
172 }
173
174 strlcpy(buf, lname, sizeof(buf));
175 m = strrchr(buf, '@');
176 if (m)
177 *m++ = '\0';
178 c = strrchr(buf, '.');
179 if (c)
180 *c++ = '\0';
181 t = strrchr(buf, '_');
182 if (t)
183 *t++ = '\0';
184 l = buf;
185 if (strlen(l) == 0)
186 goto fail;
187 if (c && !t)
188 goto fail;
189
190 if (m) {
191 if (t) {
192 if (c) {
193 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
194 l, t, c, m);
195 strlcat(result, tmp, sizeof(result));
196 strlcat(result, ":", sizeof(result));
197 }
198 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
199 strlcat(result, tmp, sizeof(result));
200 strlcat(result, ":", sizeof(result));
201 }
202 snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
203 strlcat(result, tmp, sizeof(result));
204 strlcat(result, ":", sizeof(result));
205 }
206 if (t) {
207 if (c) {
208 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
209 strlcat(result, tmp, sizeof(result));
210 strlcat(result, ":", sizeof(result));
211 }
212 strlcat(result, tmp, sizeof(result));
213 strlcat(result, ":", sizeof(result));
214 }
215 strlcat(result, l, sizeof(result));
216
217 return result;
218 }
219
220 static const char *
221 lookup_mofile(buf, len, dir, lpath, category, domainname)
222 char *buf;
223 size_t len;
224 const char *dir;
225 char *lpath; /* list of locales to be tried */
226 const char *category;
227 const char *domainname;
228 {
229 struct stat st;
230 char *p, *q;
231
232 q = lpath;
233 while (1) {
234 p = strsep(&q, ":");
235 if (!p)
236 break;
237 if (!*p)
238 continue;
239
240 /* don't mess with default locales */
241 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
242 return NULL;
243
244 /* validate pathname */
245 if (strchr(p, '/') || strchr(category, '/'))
246 continue;
247 #if 1 /*?*/
248 if (strchr(domainname, '/'))
249 continue;
250 #endif
251
252 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
253 category, domainname);
254 if (stat(buf, &st) < 0)
255 continue;
256 if ((st.st_mode & S_IFMT) != S_IFREG)
257 continue;
258
259 if (mapit(buf) == 0)
260 return buf;
261 }
262
263 return NULL;
264 }
265
266 static u_int32_t
267 flip(v, magic)
268 u_int32_t v;
269 u_int32_t magic;
270 {
271
272 if (magic == MO_MAGIC)
273 return v;
274 else if (magic == MO_MAGIC_SWAPPED) {
275 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
276 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
277 return v;
278 } else {
279 abort();
280 /*NOTREACHED*/
281 }
282 }
283
284 static int
285 validate(arg)
286 void *arg;
287 {
288 char *p;
289
290 p = (char *)arg;
291 if (p < (char *)mohandle.addr ||
292 p > (char *)mohandle.addr + mohandle.len)
293 return 0;
294 else
295 return 1;
296 }
297
298 int
299 mapit(path)
300 const char *path;
301 {
302 int fd;
303 struct stat st;
304 char *base;
305 u_int32_t magic, revision;
306 struct moentry *otable, *ttable;
307 struct moentry_h *p;
308 struct mo *mo;
309 size_t l;
310 int i;
311 char *v;
312
313 if (mohandle.addr && mohandle.addr != MAP_FAILED &&
314 strcmp(path, mohandle.path) == 0)
315 return 0; /*already opened*/
316
317 unmapit();
318
319 #if 0
320 if (secure_path(path) != 0)
321 goto fail;
322 #endif
323 if (stat(path, &st) < 0)
324 goto fail;
325 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
326 goto fail;
327 fd = open(path, O_RDONLY);
328 if (fd < 0)
329 goto fail;
330 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
331 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
332 close(fd);
333 goto fail;
334 }
335 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
336 flip(revision, magic) != MO_REVISION) {
337 close(fd);
338 goto fail;
339 }
340 mohandle.addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
341 MAP_FILE | MAP_SHARED, fd, (off_t)0);
342 if (!mohandle.addr || mohandle.addr == MAP_FAILED) {
343 close(fd);
344 goto fail;
345 }
346 close(fd);
347 mohandle.len = (size_t)st.st_size;
348 strlcpy(mohandle.path, path, sizeof(mohandle.path));
349
350 base = mohandle.addr;
351 mo = (struct mo *)mohandle.addr;
352
353 /* flip endian. do not flip magic number! */
354 mohandle.mo.mo_magic = mo->mo_magic;
355 mohandle.mo.mo_revision = flip(mo->mo_revision, magic);
356 mohandle.mo.mo_nstring = flip(mo->mo_nstring, magic);
357
358 /* validate otable/ttable */
359 otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
360 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
361 if (!validate(otable) || !validate(&otable[mohandle.mo.mo_nstring])) {
362 unmapit();
363 goto fail;
364 }
365 if (!validate(ttable) || !validate(&ttable[mohandle.mo.mo_nstring])) {
366 unmapit();
367 goto fail;
368 }
369
370 /* allocate [ot]table, and convert to normal pointer representation. */
371 l = sizeof(struct moentry_h) * mohandle.mo.mo_nstring;
372 mohandle.mo.mo_otable = (struct moentry_h *)malloc(l);
373 if (!mohandle.mo.mo_otable) {
374 unmapit();
375 goto fail;
376 }
377 mohandle.mo.mo_ttable = (struct moentry_h *)malloc(l);
378 if (!mohandle.mo.mo_ttable) {
379 unmapit();
380 goto fail;
381 }
382 p = mohandle.mo.mo_otable;
383 for (i = 0; i < mohandle.mo.mo_nstring; i++) {
384 p[i].len = flip(otable[i].len, magic);
385 p[i].off = base + flip(otable[i].off, magic);
386
387 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
388 unmapit();
389 goto fail;
390 }
391 }
392 p = mohandle.mo.mo_ttable;
393 for (i = 0; i < mohandle.mo.mo_nstring; i++) {
394 p[i].len = flip(ttable[i].len, magic);
395 p[i].off = base + flip(ttable[i].off, magic);
396
397 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
398 unmapit();
399 goto fail;
400 }
401 }
402
403 /* grab MIME-header and charset field */
404 mohandle.mo.mo_header = lookup("");
405 if (mohandle.mo.mo_header)
406 v = strstr(mohandle.mo.mo_header, "charset=");
407 else
408 v = NULL;
409 if (v) {
410 mohandle.mo.mo_charset = strdup(v + 8);
411 if (!mohandle.mo.mo_charset)
412 goto fail;
413 v = strchr(mohandle.mo.mo_charset, '\n');
414 if (v)
415 *v = '\0';
416 }
417
418 /*
419 * XXX check charset, reject it if we are unable to support the charset
420 * with the current locale.
421 * for example, if we are using euc-jp locale and we are looking at
422 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
423 * the *.mo file as we cannot support it.
424 */
425
426 return 0;
427
428 fail:
429 return -1;
430 }
431
432 static int
433 unmapit()
434 {
435
436 /* unmap if there's already mapped region */
437 if (mohandle.addr && mohandle.addr != MAP_FAILED)
438 munmap(mohandle.addr, mohandle.len);
439 mohandle.addr = NULL;
440 mohandle.path[0] = '\0';
441 if (mohandle.mo.mo_otable)
442 free(mohandle.mo.mo_otable);
443 if (mohandle.mo.mo_ttable)
444 free(mohandle.mo.mo_ttable);
445 if (mohandle.mo.mo_charset)
446 free(mohandle.mo.mo_charset);
447 memset(&mohandle.mo, 0, sizeof(mohandle.mo));
448 return 0;
449 }
450
451 static const char *
452 lookup_hash(msgid)
453 const char *msgid;
454 {
455
456 /*
457 * XXX should try a hashed lookup here, but to do so, we need to
458 * look inside the GPL'ed *.c and re-implement...
459 */
460 return NULL;
461 }
462
463 static const char *
464 lookup_bsearch(msgid)
465 const char *msgid;
466 {
467 int top, bottom, middle, omiddle;
468 int n;
469
470 top = 0;
471 bottom = mohandle.mo.mo_nstring;
472 omiddle = -1;
473 while (1) {
474 if (top > bottom)
475 break;
476 middle = (top + bottom) / 2;
477 /* avoid possible infinite loop, when the data is not sorted */
478 if (omiddle == middle)
479 break;
480 if (middle < 0 || middle >= mohandle.mo.mo_nstring)
481 break;
482
483 n = strcmp(msgid, mohandle.mo.mo_otable[middle].off);
484 if (n == 0)
485 return (const char *)mohandle.mo.mo_ttable[middle].off;
486 else if (n < 0)
487 bottom = middle;
488 else
489 top = middle;
490 omiddle = middle;
491 }
492
493 return NULL;
494 }
495
496 static const char *
497 lookup(msgid)
498 const char *msgid;
499 {
500 const char *v;
501
502 v = lookup_hash(msgid);
503 if (v)
504 return v;
505
506 return lookup_bsearch(msgid);
507 }
508
509 char *
510 dcngettext(domainname, msgid1, msgid2, n, category)
511 const char *domainname;
512 const char *msgid1;
513 const char *msgid2;
514 unsigned long int n;
515 int category;
516 {
517 const char *msgid;
518 char path[PATH_MAX];
519 static char lpath[PATH_MAX];
520 static char olpath[PATH_MAX];
521 const char *locale;
522 const char *language;
523 const char *cname = NULL;
524 const char *v;
525 static char *ocname = NULL;
526 static char *odomainname = NULL;
527 struct domainbinding *db;
528
529 msgid = (n == 1) ? msgid1 : msgid2;
530 if (msgid == NULL)
531 return NULL;
532
533 if (!domainname)
534 domainname = __binding.domainname;
535 cname = lookup_category(category);
536 if (!domainname || !cname)
537 goto fail;
538
539 language = getenv("LANGUAGE");
540 locale = setlocale(LC_MESSAGES, NULL); /*XXX*/
541 if (locale)
542 locale = split_locale(locale);
543 if (language && locale) {
544 if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
545 goto fail;
546 snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
547 } else if (language) {
548 if (strlen(language) + 1 > sizeof(lpath))
549 goto fail;
550 strlcpy(lpath, language, sizeof(lpath));
551 } else if (locale) {
552 if (strlen(locale) + 1 > sizeof(lpath))
553 goto fail;
554 strlcpy(lpath, locale, sizeof(lpath));
555 } else
556 goto fail;
557
558 for (db = __binding.next; db; db = db->next)
559 if (strcmp(db->domainname, domainname) == 0)
560 break;
561 if (!db)
562 db = &__binding;
563
564 /* don't bother looking it up if the values are the same */
565 if (odomainname && strcmp(domainname, odomainname) == 0 &&
566 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0)
567 goto found;
568
569 /* try to find appropriate file, from $LANGUAGE */
570 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
571 domainname) == NULL)
572 goto fail;
573
574 if (odomainname)
575 free(odomainname);
576 if (ocname)
577 free(ocname);
578 odomainname = strdup(domainname);
579 ocname = strdup(cname);
580 if (!odomainname || !ocname) {
581 if (odomainname)
582 free(odomainname);
583 if (ocname)
584 free(ocname);
585 odomainname = ocname = NULL;
586 goto fail;
587 }
588
589 strlcpy(olpath, lpath, sizeof(olpath));
590
591 found:
592 v = lookup(msgid);
593 if (v) {
594 /*
595 * XXX call iconv() here, if translated text is encoded
596 * differently from currently-selected encoding (locale).
597 * look at Content-type header in *.mo file, in string obtained
598 * by gettext("").
599 */
600
601 /*
602 * Given the amount of printf-format security issues, it may
603 * be a good idea to validate if the original msgid and the
604 * translated message format string carry the same printf-like
605 * format identifiers.
606 */
607
608 msgid = v;
609 }
610
611 fail:
612 /* LINTED const cast */
613 return (char *)msgid;
614 }
615