Home | History | Annotate | Line # | Download | only in libintl
gettext.c revision 1.8
      1 /*	$NetBSD: gettext.c,v 1.8 2001/02/15 10:48:31 minoura Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: gettext.c,v 1.8 2001/02/15 10:48:31 minoura Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <sys/types.h>
     35 #include <sys/param.h>
     36 #include <sys/stat.h>
     37 #include <sys/mman.h>
     38 #include <sys/uio.h>
     39 
     40 #include <fcntl.h>
     41 #include <stdio.h>
     42 #include <stdlib.h>
     43 #include <unistd.h>
     44 #include <string.h>
     45 #if 0
     46 #include <util.h>
     47 #endif
     48 #include <libintl.h>
     49 #include <locale.h>
     50 #include "libintl_local.h"
     51 #include "pathnames.h"
     52 
     53 static struct mohandle mohandle;
     54 
     55 static const char *lookup_category __P((int));
     56 static const char *split_locale __P((const char *));
     57 static const char *lookup_mofile __P((char *, size_t, const char *,
     58 	char *, const char *, const char *));
     59 static u_int32_t flip __P((u_int32_t, u_int32_t));
     60 static int validate __P((void *));
     61 static int mapit __P((const char *));
     62 static int unmapit __P((void));
     63 static const char *lookup_hash __P((const char *));
     64 static const char *lookup_bsearch __P((const char *));
     65 static const char *lookup __P((const char *));
     66 
     67 /*
     68  * shortcut functions.  the main implementation resides in dcngettext().
     69  */
     70 char *
     71 gettext(msgid)
     72 	const char *msgid;
     73 {
     74 
     75 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
     76 }
     77 
     78 char *
     79 dgettext(domainname, msgid)
     80 	const char *domainname;
     81 	const char *msgid;
     82 {
     83 
     84 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
     85 }
     86 
     87 char *
     88 dcgettext(domainname, msgid, category)
     89 	const char *domainname;
     90 	const char *msgid;
     91 	int category;
     92 {
     93 
     94 	return dcngettext(domainname, msgid, NULL, 1UL, category);
     95 }
     96 
     97 char *
     98 ngettext(msgid1, msgid2, n)
     99 	const char *msgid1;
    100 	const char *msgid2;
    101 	unsigned long int n;
    102 {
    103 
    104 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
    105 }
    106 
    107 char *
    108 dngettext(domainname, msgid1, msgid2, n)
    109 	const char *domainname;
    110 	const char *msgid1;
    111 	const char *msgid2;
    112 	unsigned long int n;
    113 {
    114 
    115 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
    116 }
    117 
    118 /*
    119  * dcngettext() -
    120  * lookup internationalized message on database locale/category/domainname
    121  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
    122  * if n equals to 1, internationalized message will be looked up for msgid1.
    123  * otherwise, message will be looked up for msgid2.
    124  * if the lookup fails, the function will return msgid1 or msgid2 as is.
    125  *
    126  * Even though the return type is "char *", caller should not rewrite the
    127  * region pointed to by the return value (should be "const char *", but can't
    128  * change it for compatibility with other implementations).
    129  *
    130  * by default (if domainname == NULL), domainname is taken from the value set
    131  * by textdomain().  usually name of the application (like "ls") is used as
    132  * domainname.  category is usually LC_MESSAGES.
    133  *
    134  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
    135  * endian encoded file.  both endians are supported here, as the files are in
    136  * /usr/share/locale! (or we should move those files into /usr/libdata)
    137  */
    138 
    139 static const char *
    140 lookup_category(category)
    141 	int category;
    142 {
    143 
    144 	switch (category) {
    145 	case LC_COLLATE:	return "LC_COLLATE";
    146 	case LC_CTYPE:		return "LC_CTYPE";
    147 	case LC_MONETARY:	return "LC_MONETARY";
    148 	case LC_NUMERIC:	return "LC_NUMERIC";
    149 	case LC_TIME:		return "LC_TIME";
    150 	case LC_MESSAGES:	return "LC_MESSAGES";
    151 	}
    152 	return NULL;
    153 }
    154 
    155 /*
    156  * XPG syntax: language[_territory[.codeset]][@modifier]
    157  * XXX boundary check on "result" is lacking
    158  */
    159 static const char *
    160 split_locale(lname)
    161 	const char *lname;
    162 {
    163 	char buf[BUFSIZ], tmp[BUFSIZ];
    164 	char *l, *t, *c, *m;
    165 	static char result[BUFSIZ];
    166 
    167 	memset(result, 0, sizeof(result));
    168 
    169 	if (strlen(lname) + 1 > sizeof(buf)) {
    170 fail:
    171 		return lname;
    172 	}
    173 
    174 	strlcpy(buf, lname, sizeof(buf));
    175 	m = strrchr(buf, '@');
    176 	if (m)
    177 		*m++ = '\0';
    178 	c = strrchr(buf, '.');
    179 	if (c)
    180 		*c++ = '\0';
    181 	t = strrchr(buf, '_');
    182 	if (t)
    183 		*t++ = '\0';
    184 	l = buf;
    185 	if (strlen(l) == 0)
    186 		goto fail;
    187 	if (c && !t)
    188 		goto fail;
    189 
    190 	if (m) {
    191 		if (t) {
    192 			if (c) {
    193 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
    194 				    l, t, c, m);
    195 				strlcat(result, tmp, sizeof(result));
    196 				strlcat(result, ":", sizeof(result));
    197 			}
    198 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
    199 			strlcat(result, tmp, sizeof(result));
    200 			strlcat(result, ":", sizeof(result));
    201 		}
    202 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
    203 		strlcat(result, tmp, sizeof(result));
    204 		strlcat(result, ":", sizeof(result));
    205 	}
    206 	if (t) {
    207 		if (c) {
    208 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
    209 			strlcat(result, tmp, sizeof(result));
    210 			strlcat(result, ":", sizeof(result));
    211 		}
    212 		strlcat(result, tmp, sizeof(result));
    213 		strlcat(result, ":", sizeof(result));
    214 	}
    215 	strlcat(result, l, sizeof(result));
    216 
    217 	return result;
    218 }
    219 
    220 static const char *
    221 lookup_mofile(buf, len, dir, lpath, category, domainname)
    222 	char *buf;
    223 	size_t len;
    224 	const char *dir;
    225 	char *lpath;	/* list of locales to be tried */
    226 	const char *category;
    227 	const char *domainname;
    228 {
    229 	struct stat st;
    230 	char *p, *q;
    231 
    232 	q = lpath;
    233 	while (1) {
    234 		p = strsep(&q, ":");
    235 		if (!p)
    236 			break;
    237 		if (!*p)
    238 			continue;
    239 
    240 		/* don't mess with default locales */
    241 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
    242 			return NULL;
    243 
    244 		/* validate pathname */
    245 		if (strchr(p, '/') || strchr(category, '/'))
    246 			continue;
    247 #if 1	/*?*/
    248 		if (strchr(domainname, '/'))
    249 			continue;
    250 #endif
    251 
    252 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
    253 		    category, domainname);
    254 		if (stat(buf, &st) < 0)
    255 			continue;
    256 		if ((st.st_mode & S_IFMT) != S_IFREG)
    257 			continue;
    258 
    259 		if (mapit(buf) == 0)
    260 			return buf;
    261 	}
    262 
    263 	return NULL;
    264 }
    265 
    266 static u_int32_t
    267 flip(v, magic)
    268 	u_int32_t v;
    269 	u_int32_t magic;
    270 {
    271 
    272 	if (magic == MO_MAGIC)
    273 		return v;
    274 	else if (magic == MO_MAGIC_SWAPPED) {
    275 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
    276 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
    277 		return v;
    278 	} else {
    279 		abort();
    280 		/*NOTREACHED*/
    281 	}
    282 }
    283 
    284 static int
    285 validate(arg)
    286 	void *arg;
    287 {
    288 	char *p;
    289 
    290 	p = (char *)arg;
    291 	if (p < (char *)mohandle.addr ||
    292 	    p > (char *)mohandle.addr + mohandle.len)
    293 		return 0;
    294 	else
    295 		return 1;
    296 }
    297 
    298 int
    299 mapit(path)
    300 	const char *path;
    301 {
    302 	int fd;
    303 	struct stat st;
    304 	char *base;
    305 	u_int32_t magic, revision;
    306 	struct moentry *otable, *ttable;
    307 	struct moentry_h *p;
    308 	struct mo *mo;
    309 	size_t l;
    310 	int i;
    311 	char *v;
    312 
    313 	if (mohandle.addr && mohandle.addr != MAP_FAILED &&
    314 	    strcmp(path, mohandle.path) == 0)
    315 		return 0;	/*already opened*/
    316 
    317 	unmapit();
    318 
    319 #if 0
    320 	if (secure_path(path) != 0)
    321 		goto fail;
    322 #endif
    323 	if (stat(path, &st) < 0)
    324 		goto fail;
    325 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
    326 		goto fail;
    327 	fd = open(path, O_RDONLY);
    328 	if (fd < 0)
    329 		goto fail;
    330 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
    331 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
    332 		close(fd);
    333 		goto fail;
    334 	}
    335 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
    336 	    flip(revision, magic) != MO_REVISION) {
    337 		close(fd);
    338 		goto fail;
    339 	}
    340 	mohandle.addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
    341 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
    342 	if (!mohandle.addr || mohandle.addr == MAP_FAILED) {
    343 		close(fd);
    344 		goto fail;
    345 	}
    346 	close(fd);
    347 	mohandle.len = (size_t)st.st_size;
    348 	strlcpy(mohandle.path, path, sizeof(mohandle.path));
    349 
    350 	base = mohandle.addr;
    351 	mo = (struct mo *)mohandle.addr;
    352 
    353 	/* flip endian.  do not flip magic number! */
    354 	mohandle.mo.mo_magic = mo->mo_magic;
    355 	mohandle.mo.mo_revision = flip(mo->mo_revision, magic);
    356 	mohandle.mo.mo_nstring = flip(mo->mo_nstring, magic);
    357 
    358 	/* validate otable/ttable */
    359 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
    360 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
    361 	if (!validate(otable) || !validate(&otable[mohandle.mo.mo_nstring])) {
    362 		unmapit();
    363 		goto fail;
    364 	}
    365 	if (!validate(ttable) || !validate(&ttable[mohandle.mo.mo_nstring])) {
    366 		unmapit();
    367 		goto fail;
    368 	}
    369 
    370 	/* allocate [ot]table, and convert to normal pointer representation. */
    371 	l = sizeof(struct moentry_h) * mohandle.mo.mo_nstring;
    372 	mohandle.mo.mo_otable = (struct moentry_h *)malloc(l);
    373 	if (!mohandle.mo.mo_otable) {
    374 		unmapit();
    375 		goto fail;
    376 	}
    377 	mohandle.mo.mo_ttable = (struct moentry_h *)malloc(l);
    378 	if (!mohandle.mo.mo_ttable) {
    379 		unmapit();
    380 		goto fail;
    381 	}
    382 	p = mohandle.mo.mo_otable;
    383 	for (i = 0; i < mohandle.mo.mo_nstring; i++) {
    384 		p[i].len = flip(otable[i].len, magic);
    385 		p[i].off = base + flip(otable[i].off, magic);
    386 
    387 		if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
    388 			unmapit();
    389 			goto fail;
    390 		}
    391 	}
    392 	p = mohandle.mo.mo_ttable;
    393 	for (i = 0; i < mohandle.mo.mo_nstring; i++) {
    394 		p[i].len = flip(ttable[i].len, magic);
    395 		p[i].off = base + flip(ttable[i].off, magic);
    396 
    397 		if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
    398 			unmapit();
    399 			goto fail;
    400 		}
    401 	}
    402 
    403 	/* grab MIME-header and charset field */
    404 	mohandle.mo.mo_header = lookup("");
    405 	if (mohandle.mo.mo_header)
    406 		v = strstr(mohandle.mo.mo_header, "charset=");
    407 	else
    408 		v = NULL;
    409 	if (v) {
    410 		mohandle.mo.mo_charset = strdup(v + 8);
    411 		if (!mohandle.mo.mo_charset)
    412 			goto fail;
    413 		v = strchr(mohandle.mo.mo_charset, '\n');
    414 		if (v)
    415 			*v = '\0';
    416 	}
    417 
    418 	/*
    419 	 * XXX check charset, reject it if we are unable to support the charset
    420 	 * with the current locale.
    421 	 * for example, if we are using euc-jp locale and we are looking at
    422 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
    423 	 * the *.mo file as we cannot support it.
    424 	 */
    425 
    426 	return 0;
    427 
    428 fail:
    429 	return -1;
    430 }
    431 
    432 static int
    433 unmapit()
    434 {
    435 
    436 	/* unmap if there's already mapped region */
    437 	if (mohandle.addr && mohandle.addr != MAP_FAILED)
    438 		munmap(mohandle.addr, mohandle.len);
    439 	mohandle.addr = NULL;
    440 	mohandle.path[0] = '\0';
    441 	if (mohandle.mo.mo_otable)
    442 		free(mohandle.mo.mo_otable);
    443 	if (mohandle.mo.mo_ttable)
    444 		free(mohandle.mo.mo_ttable);
    445 	if (mohandle.mo.mo_charset)
    446 		free(mohandle.mo.mo_charset);
    447 	memset(&mohandle.mo, 0, sizeof(mohandle.mo));
    448 	return 0;
    449 }
    450 
    451 static const char *
    452 lookup_hash(msgid)
    453 	const char *msgid;
    454 {
    455 
    456 	/*
    457 	 * XXX should try a hashed lookup here, but to do so, we need to
    458 	 * look inside the GPL'ed *.c and re-implement...
    459 	 */
    460 	return NULL;
    461 }
    462 
    463 static const char *
    464 lookup_bsearch(msgid)
    465 	const char *msgid;
    466 {
    467 	int top, bottom, middle, omiddle;
    468 	int n;
    469 
    470 	top = 0;
    471 	bottom = mohandle.mo.mo_nstring;
    472 	omiddle = -1;
    473 	while (1) {
    474 		if (top > bottom)
    475 			break;
    476 		middle = (top + bottom) / 2;
    477 		/* avoid possible infinite loop, when the data is not sorted */
    478 		if (omiddle == middle)
    479 			break;
    480 		if (middle < 0 || middle >= mohandle.mo.mo_nstring)
    481 			break;
    482 
    483 		n = strcmp(msgid, mohandle.mo.mo_otable[middle].off);
    484 		if (n == 0)
    485 			return (const char *)mohandle.mo.mo_ttable[middle].off;
    486 		else if (n < 0)
    487 			bottom = middle;
    488 		else
    489 			top = middle;
    490 		omiddle = middle;
    491 	}
    492 
    493 	return NULL;
    494 }
    495 
    496 static const char *
    497 lookup(msgid)
    498 	const char *msgid;
    499 {
    500 	const char *v;
    501 
    502 	v = lookup_hash(msgid);
    503 	if (v)
    504 		return v;
    505 
    506 	return lookup_bsearch(msgid);
    507 }
    508 
    509 char *
    510 dcngettext(domainname, msgid1, msgid2, n, category)
    511 	const char *domainname;
    512 	const char *msgid1;
    513 	const char *msgid2;
    514 	unsigned long int n;
    515 	int category;
    516 {
    517 	const char *msgid;
    518 	char path[PATH_MAX];
    519 	static char lpath[PATH_MAX];
    520 	static char olpath[PATH_MAX];
    521 	const char *locale;
    522 	const char *language;
    523 	const char *cname = NULL;
    524 	const char *v;
    525 	static char *ocname = NULL;
    526 	static char *odomainname = NULL;
    527 	struct domainbinding *db;
    528 
    529 	msgid = (n == 1) ? msgid1 : msgid2;
    530 	if (msgid == NULL)
    531 		return NULL;
    532 
    533 	if (!domainname)
    534 		domainname = __binding.domainname;
    535 	cname = lookup_category(category);
    536 	if (!domainname || !cname)
    537 		goto fail;
    538 
    539 	language = getenv("LANGUAGE");
    540 	locale = setlocale(LC_MESSAGES, NULL);	/*XXX*/
    541 	if (locale)
    542 		locale = split_locale(locale);
    543 	if (language && locale) {
    544 		if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
    545 			goto fail;
    546 		snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
    547 	} else if (language) {
    548 		if (strlen(language) + 1 > sizeof(lpath))
    549 			goto fail;
    550 		strlcpy(lpath, language, sizeof(lpath));
    551 	} else if (locale) {
    552 		if (strlen(locale) + 1 > sizeof(lpath))
    553 			goto fail;
    554 		strlcpy(lpath, locale, sizeof(lpath));
    555 	} else
    556 		goto fail;
    557 
    558 	for (db = __binding.next; db; db = db->next)
    559 		if (strcmp(db->domainname, domainname) == 0)
    560 			break;
    561 	if (!db)
    562 		db = &__binding;
    563 
    564 	/* don't bother looking it up if the values are the same */
    565 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
    566 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0)
    567 		goto found;
    568 
    569 	/* try to find appropriate file, from $LANGUAGE */
    570 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
    571 	    domainname) == NULL)
    572 		goto fail;
    573 
    574 	if (odomainname)
    575 		free(odomainname);
    576 	if (ocname)
    577 		free(ocname);
    578 	odomainname = strdup(domainname);
    579 	ocname = strdup(cname);
    580 	if (!odomainname || !ocname) {
    581 		if (odomainname)
    582 			free(odomainname);
    583 		if (ocname)
    584 			free(ocname);
    585 		odomainname = ocname = NULL;
    586 		goto fail;
    587 	}
    588 
    589 	strlcpy(olpath, lpath, sizeof(olpath));
    590 
    591 found:
    592 	v = lookup(msgid);
    593 	if (v) {
    594 		/*
    595 		 * XXX call iconv() here, if translated text is encoded
    596 		 * differently from currently-selected encoding (locale).
    597 		 * look at Content-type header in *.mo file, in string obtained
    598 		 * by gettext("").
    599 		 */
    600 
    601 		/*
    602 		 * Given the amount of printf-format security issues, it may
    603 		 * be a good idea to validate if the original msgid and the
    604 		 * translated message format string carry the same printf-like
    605 		 * format identifiers.
    606 		 */
    607 
    608 		msgid = v;
    609 	}
    610 
    611 fail:
    612 	/* LINTED const cast */
    613 	return (char *)msgid;
    614 }
    615