Home | History | Annotate | Line # | Download | only in libintl
gettext.c revision 1.12
      1 /*	$NetBSD: gettext.c,v 1.12 2001/12/29 05:54:36 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000, 2001 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  *
     28  * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
     29  */
     30 
     31 #include <sys/cdefs.h>
     32 #if defined(LIBC_SCCS) && !defined(lint)
     33 __RCSID("$NetBSD: gettext.c,v 1.12 2001/12/29 05:54:36 yamt Exp $");
     34 #endif /* LIBC_SCCS and not lint */
     35 
     36 #include <sys/types.h>
     37 #include <sys/param.h>
     38 #include <sys/stat.h>
     39 #include <sys/mman.h>
     40 #include <sys/uio.h>
     41 
     42 #include <fcntl.h>
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <unistd.h>
     46 #include <string.h>
     47 #if 0
     48 #include <util.h>
     49 #endif
     50 #include <libintl.h>
     51 #include <locale.h>
     52 #include "libintl_local.h"
     53 #include "pathnames.h"
     54 
     55 static const char *lookup_category __P((int));
     56 static const char *split_locale __P((const char *));
     57 static const char *lookup_mofile __P((char *, size_t, const char *,
     58 	const char *, const char *, const char *, struct domainbinding *));
     59 static u_int32_t flip __P((u_int32_t, u_int32_t));
     60 static int validate __P((void *, struct mohandle *));
     61 static int mapit __P((const char *, struct domainbinding *));
     62 static int unmapit __P((struct domainbinding *));
     63 static const char *lookup_hash __P((const char *, struct domainbinding *));
     64 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
     65 static const char *lookup __P((const char *, struct domainbinding *));
     66 static const char *get_lang_env(const char *);
     67 
     68 /*
     69  * shortcut functions.  the main implementation resides in dcngettext().
     70  */
     71 char *
     72 gettext(msgid)
     73 	const char *msgid;
     74 {
     75 
     76 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
     77 }
     78 
     79 char *
     80 dgettext(domainname, msgid)
     81 	const char *domainname;
     82 	const char *msgid;
     83 {
     84 
     85 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
     86 }
     87 
     88 char *
     89 dcgettext(domainname, msgid, category)
     90 	const char *domainname;
     91 	const char *msgid;
     92 	int category;
     93 {
     94 
     95 	return dcngettext(domainname, msgid, NULL, 1UL, category);
     96 }
     97 
     98 char *
     99 ngettext(msgid1, msgid2, n)
    100 	const char *msgid1;
    101 	const char *msgid2;
    102 	unsigned long int n;
    103 {
    104 
    105 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
    106 }
    107 
    108 char *
    109 dngettext(domainname, msgid1, msgid2, n)
    110 	const char *domainname;
    111 	const char *msgid1;
    112 	const char *msgid2;
    113 	unsigned long int n;
    114 {
    115 
    116 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
    117 }
    118 
    119 /*
    120  * dcngettext() -
    121  * lookup internationalized message on database locale/category/domainname
    122  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
    123  * if n equals to 1, internationalized message will be looked up for msgid1.
    124  * otherwise, message will be looked up for msgid2.
    125  * if the lookup fails, the function will return msgid1 or msgid2 as is.
    126  *
    127  * Even though the return type is "char *", caller should not rewrite the
    128  * region pointed to by the return value (should be "const char *", but can't
    129  * change it for compatibility with other implementations).
    130  *
    131  * by default (if domainname == NULL), domainname is taken from the value set
    132  * by textdomain().  usually name of the application (like "ls") is used as
    133  * domainname.  category is usually LC_MESSAGES.
    134  *
    135  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
    136  * endian encoded file.  both endians are supported here, as the files are in
    137  * /usr/share/locale! (or we should move those files into /usr/libdata)
    138  */
    139 
    140 static const char *
    141 lookup_category(category)
    142 	int category;
    143 {
    144 
    145 	switch (category) {
    146 	case LC_COLLATE:	return "LC_COLLATE";
    147 	case LC_CTYPE:		return "LC_CTYPE";
    148 	case LC_MONETARY:	return "LC_MONETARY";
    149 	case LC_NUMERIC:	return "LC_NUMERIC";
    150 	case LC_TIME:		return "LC_TIME";
    151 	case LC_MESSAGES:	return "LC_MESSAGES";
    152 	}
    153 	return NULL;
    154 }
    155 
    156 /*
    157  * XPG syntax: language[_territory[.codeset]][@modifier]
    158  * XXX boundary check on "result" is lacking
    159  */
    160 static const char *
    161 split_locale(lname)
    162 	const char *lname;
    163 {
    164 	char buf[BUFSIZ], tmp[BUFSIZ];
    165 	char *l, *t, *c, *m;
    166 	static char result[BUFSIZ];
    167 
    168 	memset(result, 0, sizeof(result));
    169 
    170 	if (strlen(lname) + 1 > sizeof(buf)) {
    171 fail:
    172 		return lname;
    173 	}
    174 
    175 	strlcpy(buf, lname, sizeof(buf));
    176 	m = strrchr(buf, '@');
    177 	if (m)
    178 		*m++ = '\0';
    179 	c = strrchr(buf, '.');
    180 	if (c)
    181 		*c++ = '\0';
    182 	t = strrchr(buf, '_');
    183 	if (t)
    184 		*t++ = '\0';
    185 	l = buf;
    186 	if (strlen(l) == 0)
    187 		goto fail;
    188 	if (c && !t)
    189 		goto fail;
    190 
    191 	if (m) {
    192 		if (t) {
    193 			if (c) {
    194 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
    195 				    l, t, c, m);
    196 				strlcat(result, tmp, sizeof(result));
    197 				strlcat(result, ":", sizeof(result));
    198 			}
    199 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
    200 			strlcat(result, tmp, sizeof(result));
    201 			strlcat(result, ":", sizeof(result));
    202 		}
    203 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
    204 		strlcat(result, tmp, sizeof(result));
    205 		strlcat(result, ":", sizeof(result));
    206 	}
    207 	if (t) {
    208 		if (c) {
    209 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
    210 			strlcat(result, tmp, sizeof(result));
    211 			strlcat(result, ":", sizeof(result));
    212 		}
    213 		snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
    214 		strlcat(result, tmp, sizeof(result));
    215 		strlcat(result, ":", sizeof(result));
    216 	}
    217 	strlcat(result, l, sizeof(result));
    218 
    219 	return result;
    220 }
    221 
    222 static const char *
    223 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
    224 	char *buf;
    225 	size_t len;
    226 	const char *dir;
    227 	const char *lpath;	/* list of locales to be tried */
    228 	const char *category;
    229 	const char *domainname;
    230 	struct domainbinding *db;
    231 {
    232 	struct stat st;
    233 	char *p, *q;
    234 	char lpath_tmp[BUFSIZ];
    235 
    236 	strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
    237 	q = lpath_tmp;
    238 	/* CONSTCOND */
    239 	while (1) {
    240 		p = strsep(&q, ":");
    241 		if (!p)
    242 			break;
    243 		if (!*p)
    244 			continue;
    245 
    246 		/* don't mess with default locales */
    247 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
    248 			return NULL;
    249 
    250 		/* validate pathname */
    251 		if (strchr(p, '/') || strchr(category, '/'))
    252 			continue;
    253 #if 1	/*?*/
    254 		if (strchr(domainname, '/'))
    255 			continue;
    256 #endif
    257 
    258 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
    259 		    category, domainname);
    260 		if (stat(buf, &st) < 0)
    261 			continue;
    262 		if ((st.st_mode & S_IFMT) != S_IFREG)
    263 			continue;
    264 
    265 		if (mapit(buf, db) == 0)
    266 			return buf;
    267 	}
    268 
    269 	return NULL;
    270 }
    271 
    272 static u_int32_t
    273 flip(v, magic)
    274 	u_int32_t v;
    275 	u_int32_t magic;
    276 {
    277 
    278 	if (magic == MO_MAGIC)
    279 		return v;
    280 	else if (magic == MO_MAGIC_SWAPPED) {
    281 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
    282 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
    283 		return v;
    284 	} else {
    285 		abort();
    286 		/*NOTREACHED*/
    287 	}
    288 }
    289 
    290 static int
    291 validate(arg, mohandle)
    292 	void *arg;
    293 	struct mohandle *mohandle;
    294 {
    295 	char *p;
    296 
    297 	p = (char *)arg;
    298 	if (p < (char *)mohandle->addr ||
    299 	    p > (char *)mohandle->addr + mohandle->len)
    300 		return 0;
    301 	else
    302 		return 1;
    303 }
    304 
    305 int
    306 mapit(path, db)
    307 	const char *path;
    308 	struct domainbinding *db;
    309 {
    310 	int fd;
    311 	struct stat st;
    312 	char *base;
    313 	u_int32_t magic, revision;
    314 	struct moentry *otable, *ttable;
    315 	struct moentry_h *p;
    316 	struct mo *mo;
    317 	size_t l;
    318 	int i;
    319 	char *v;
    320 	struct mohandle *mohandle = &db->mohandle;
    321 
    322 	if (mohandle->addr && mohandle->addr != MAP_FAILED &&
    323 	    mohandle->mo.mo_magic)
    324 		return 0;	/*already opened*/
    325 
    326 	unmapit(db);
    327 
    328 #if 0
    329 	if (secure_path(path) != 0)
    330 		goto fail;
    331 #endif
    332 	if (stat(path, &st) < 0)
    333 		goto fail;
    334 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
    335 		goto fail;
    336 	fd = open(path, O_RDONLY);
    337 	if (fd < 0)
    338 		goto fail;
    339 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
    340 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
    341 		close(fd);
    342 		goto fail;
    343 	}
    344 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
    345 	    flip(revision, magic) != MO_REVISION) {
    346 		close(fd);
    347 		goto fail;
    348 	}
    349 	mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
    350 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
    351 	if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
    352 		close(fd);
    353 		goto fail;
    354 	}
    355 	close(fd);
    356 	mohandle->len = (size_t)st.st_size;
    357 
    358 	base = mohandle->addr;
    359 	mo = (struct mo *)mohandle->addr;
    360 
    361 	/* flip endian.  do not flip magic number! */
    362 	mohandle->mo.mo_magic = mo->mo_magic;
    363 	mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
    364 	mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
    365 
    366 	/* validate otable/ttable */
    367 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
    368 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
    369 	if (!validate(otable, mohandle) ||
    370 	    !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
    371 		unmapit(db);
    372 		goto fail;
    373 	}
    374 	if (!validate(ttable, mohandle) ||
    375 	    !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
    376 		unmapit(db);
    377 		goto fail;
    378 	}
    379 
    380 	/* allocate [ot]table, and convert to normal pointer representation. */
    381 	l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
    382 	mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
    383 	if (!mohandle->mo.mo_otable) {
    384 		unmapit(db);
    385 		goto fail;
    386 	}
    387 	mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
    388 	if (!mohandle->mo.mo_ttable) {
    389 		unmapit(db);
    390 		goto fail;
    391 	}
    392 	p = mohandle->mo.mo_otable;
    393 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
    394 		p[i].len = flip(otable[i].len, magic);
    395 		p[i].off = base + flip(otable[i].off, magic);
    396 
    397 		if (!validate(p[i].off, mohandle) ||
    398 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
    399 			unmapit(db);
    400 			goto fail;
    401 		}
    402 	}
    403 	p = mohandle->mo.mo_ttable;
    404 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
    405 		p[i].len = flip(ttable[i].len, magic);
    406 		p[i].off = base + flip(ttable[i].off, magic);
    407 
    408 		if (!validate(p[i].off, mohandle) ||
    409 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
    410 			unmapit(db);
    411 			goto fail;
    412 		}
    413 	}
    414 
    415 	/* grab MIME-header and charset field */
    416 	mohandle->mo.mo_header = lookup("", db);
    417 	if (mohandle->mo.mo_header)
    418 		v = strstr(mohandle->mo.mo_header, "charset=");
    419 	else
    420 		v = NULL;
    421 	if (v) {
    422 		mohandle->mo.mo_charset = strdup(v + 8);
    423 		if (!mohandle->mo.mo_charset)
    424 			goto fail;
    425 		v = strchr(mohandle->mo.mo_charset, '\n');
    426 		if (v)
    427 			*v = '\0';
    428 	}
    429 
    430 	/*
    431 	 * XXX check charset, reject it if we are unable to support the charset
    432 	 * with the current locale.
    433 	 * for example, if we are using euc-jp locale and we are looking at
    434 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
    435 	 * the *.mo file as we cannot support it.
    436 	 */
    437 
    438 	return 0;
    439 
    440 fail:
    441 	return -1;
    442 }
    443 
    444 static int
    445 unmapit(db)
    446 	struct domainbinding *db;
    447 {
    448 	struct mohandle *mohandle = &db->mohandle;
    449 
    450 	/* unmap if there's already mapped region */
    451 	if (mohandle->addr && mohandle->addr != MAP_FAILED)
    452 		munmap(mohandle->addr, mohandle->len);
    453 	mohandle->addr = NULL;
    454 	if (mohandle->mo.mo_otable)
    455 		free(mohandle->mo.mo_otable);
    456 	if (mohandle->mo.mo_ttable)
    457 		free(mohandle->mo.mo_ttable);
    458 	if (mohandle->mo.mo_charset)
    459 		free(mohandle->mo.mo_charset);
    460 	memset(&mohandle->mo, 0, sizeof(mohandle->mo));
    461 	return 0;
    462 }
    463 
    464 /* ARGSUSED */
    465 static const char *
    466 lookup_hash(msgid, db)
    467 	const char *msgid;
    468 	struct domainbinding *db;
    469 {
    470 
    471 	/*
    472 	 * XXX should try a hashed lookup here, but to do so, we need to
    473 	 * look inside the GPL'ed *.c and re-implement...
    474 	 */
    475 	return NULL;
    476 }
    477 
    478 static const char *
    479 lookup_bsearch(msgid, db)
    480 	const char *msgid;
    481 	struct domainbinding *db;
    482 {
    483 	int top, bottom, middle, omiddle;
    484 	int n;
    485 	struct mohandle *mohandle = &db->mohandle;
    486 
    487 	top = 0;
    488 	bottom = mohandle->mo.mo_nstring;
    489 	omiddle = -1;
    490 	/* CONSTCOND */
    491 	while (1) {
    492 		if (top > bottom)
    493 			break;
    494 		middle = (top + bottom) / 2;
    495 		/* avoid possible infinite loop, when the data is not sorted */
    496 		if (omiddle == middle)
    497 			break;
    498 		if (middle < 0 || middle >= mohandle->mo.mo_nstring)
    499 			break;
    500 
    501 		n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
    502 		if (n == 0)
    503 			return (const char *)mohandle->mo.mo_ttable[middle].off;
    504 		else if (n < 0)
    505 			bottom = middle;
    506 		else
    507 			top = middle;
    508 		omiddle = middle;
    509 	}
    510 
    511 	return NULL;
    512 }
    513 
    514 static const char *
    515 lookup(msgid, db)
    516 	const char *msgid;
    517 	struct domainbinding *db;
    518 {
    519 	const char *v;
    520 
    521 	v = lookup_hash(msgid, db);
    522 	if (v)
    523 		return v;
    524 
    525 	return lookup_bsearch(msgid, db);
    526 }
    527 
    528 static const char *get_lang_env(const char *category_name)
    529 {
    530 	const char *lang;
    531 
    532 	/* 1. see LANGUAGE variable first. */
    533 	lang = getenv("LANGUAGE");
    534 	if (lang)
    535 		return lang;
    536 
    537 	/* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
    538 	lang = getenv(category_name);
    539 	if (!lang)
    540 		lang = getenv("LC_ALL");
    541 	if (!lang)
    542 		lang = getenv("LANG");
    543 
    544 	if (!lang)
    545 		return 0; /* error */
    546 
    547 	return split_locale(lang);
    548 }
    549 
    550 char *
    551 dcngettext(domainname, msgid1, msgid2, n, category)
    552 	const char *domainname;
    553 	const char *msgid1;
    554 	const char *msgid2;
    555 	unsigned long int n;
    556 	int category;
    557 {
    558 	const char *msgid;
    559 	char path[PATH_MAX];
    560 	const char *lpath;
    561 	static char olpath[PATH_MAX];
    562 	const char *cname = NULL;
    563 	const char *v;
    564 	static char *ocname = NULL;
    565 	static char *odomainname = NULL;
    566 	struct domainbinding *db;
    567 
    568 	msgid = (n == 1) ? msgid1 : msgid2;
    569 	if (msgid == NULL)
    570 		return NULL;
    571 
    572 	if (!domainname)
    573 		domainname = __current_domainname;
    574 	cname = lookup_category(category);
    575 	if (!domainname || !cname)
    576 		goto fail;
    577 
    578 	lpath = get_lang_env(cname);
    579 	if (!lpath)
    580 		goto fail;
    581 
    582 	for (db = __bindings; db; db = db->next)
    583 		if (strcmp(db->domainname, domainname) == 0)
    584 			break;
    585 	if (!db) {
    586 		if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
    587 			goto fail;
    588 		db = __bindings;
    589 	}
    590 
    591 	/* resolve relative path */
    592 	/* XXX not necessary? */
    593 	if (db->path[0] != '/') {
    594 		char buf[PATH_MAX];
    595 
    596 		if (getcwd(buf, sizeof(buf)) == 0)
    597 			goto fail;
    598 		if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
    599 			goto fail;
    600 		if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
    601 			goto fail;
    602 		strcpy(db->path, buf);
    603 	}
    604 
    605 	/* don't bother looking it up if the values are the same */
    606 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
    607 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
    608 	    db->mohandle.mo.mo_magic)
    609 		goto found;
    610 
    611 	/* try to find appropriate file, from $LANGUAGE */
    612 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
    613 	    domainname, db) == NULL)
    614 		goto fail;
    615 
    616 	if (odomainname)
    617 		free(odomainname);
    618 	if (ocname)
    619 		free(ocname);
    620 	odomainname = strdup(domainname);
    621 	ocname = strdup(cname);
    622 	if (!odomainname || !ocname) {
    623 		if (odomainname)
    624 			free(odomainname);
    625 		if (ocname)
    626 			free(ocname);
    627 		odomainname = ocname = NULL;
    628 	}
    629 	else
    630 		strlcpy(olpath, lpath, sizeof(olpath));
    631 
    632 found:
    633 	v = lookup(msgid, db);
    634 	if (v) {
    635 		/*
    636 		 * XXX call iconv() here, if translated text is encoded
    637 		 * differently from currently-selected encoding (locale).
    638 		 * look at Content-type header in *.mo file, in string obtained
    639 		 * by gettext("").
    640 		 */
    641 
    642 		/*
    643 		 * Given the amount of printf-format security issues, it may
    644 		 * be a good idea to validate if the original msgid and the
    645 		 * translated message format string carry the same printf-like
    646 		 * format identifiers.
    647 		 */
    648 
    649 		msgid = v;
    650 	}
    651 
    652 fail:
    653 	/* LINTED const cast */
    654 	return (char *)msgid;
    655 }
    656