Home | History | Annotate | Line # | Download | only in libintl
gettext.c revision 1.9
      1 /*	$NetBSD: gettext.c,v 1.9 2001/02/16 07:20:35 minoura Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000, 2001 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: gettext.c,v 1.9 2001/02/16 07:20:35 minoura Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <sys/types.h>
     35 #include <sys/param.h>
     36 #include <sys/stat.h>
     37 #include <sys/mman.h>
     38 #include <sys/uio.h>
     39 
     40 #include <fcntl.h>
     41 #include <stdio.h>
     42 #include <stdlib.h>
     43 #include <unistd.h>
     44 #include <string.h>
     45 #if 0
     46 #include <util.h>
     47 #endif
     48 #include <libintl.h>
     49 #include <locale.h>
     50 #include "libintl_local.h"
     51 #include "pathnames.h"
     52 
     53 static const char *lookup_category __P((int));
     54 static const char *split_locale __P((const char *));
     55 static const char *lookup_mofile __P((char *, size_t, const char *,
     56 	char *, const char *, const char *, struct domainbinding *));
     57 static u_int32_t flip __P((u_int32_t, u_int32_t));
     58 static int validate __P((void *, struct mohandle *));
     59 static int mapit __P((const char *, struct domainbinding *));
     60 static int unmapit __P((struct domainbinding *));
     61 static const char *lookup_hash __P((const char *, struct domainbinding *));
     62 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
     63 static const char *lookup __P((const char *, struct domainbinding *));
     64 
     65 /*
     66  * shortcut functions.  the main implementation resides in dcngettext().
     67  */
     68 char *
     69 gettext(msgid)
     70 	const char *msgid;
     71 {
     72 
     73 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
     74 }
     75 
     76 char *
     77 dgettext(domainname, msgid)
     78 	const char *domainname;
     79 	const char *msgid;
     80 {
     81 
     82 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
     83 }
     84 
     85 char *
     86 dcgettext(domainname, msgid, category)
     87 	const char *domainname;
     88 	const char *msgid;
     89 	int category;
     90 {
     91 
     92 	return dcngettext(domainname, msgid, NULL, 1UL, category);
     93 }
     94 
     95 char *
     96 ngettext(msgid1, msgid2, n)
     97 	const char *msgid1;
     98 	const char *msgid2;
     99 	unsigned long int n;
    100 {
    101 
    102 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
    103 }
    104 
    105 char *
    106 dngettext(domainname, msgid1, msgid2, n)
    107 	const char *domainname;
    108 	const char *msgid1;
    109 	const char *msgid2;
    110 	unsigned long int n;
    111 {
    112 
    113 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
    114 }
    115 
    116 /*
    117  * dcngettext() -
    118  * lookup internationalized message on database locale/category/domainname
    119  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
    120  * if n equals to 1, internationalized message will be looked up for msgid1.
    121  * otherwise, message will be looked up for msgid2.
    122  * if the lookup fails, the function will return msgid1 or msgid2 as is.
    123  *
    124  * Even though the return type is "char *", caller should not rewrite the
    125  * region pointed to by the return value (should be "const char *", but can't
    126  * change it for compatibility with other implementations).
    127  *
    128  * by default (if domainname == NULL), domainname is taken from the value set
    129  * by textdomain().  usually name of the application (like "ls") is used as
    130  * domainname.  category is usually LC_MESSAGES.
    131  *
    132  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
    133  * endian encoded file.  both endians are supported here, as the files are in
    134  * /usr/share/locale! (or we should move those files into /usr/libdata)
    135  */
    136 
    137 static const char *
    138 lookup_category(category)
    139 	int category;
    140 {
    141 
    142 	switch (category) {
    143 	case LC_COLLATE:	return "LC_COLLATE";
    144 	case LC_CTYPE:		return "LC_CTYPE";
    145 	case LC_MONETARY:	return "LC_MONETARY";
    146 	case LC_NUMERIC:	return "LC_NUMERIC";
    147 	case LC_TIME:		return "LC_TIME";
    148 	case LC_MESSAGES:	return "LC_MESSAGES";
    149 	}
    150 	return NULL;
    151 }
    152 
    153 /*
    154  * XPG syntax: language[_territory[.codeset]][@modifier]
    155  * XXX boundary check on "result" is lacking
    156  */
    157 static const char *
    158 split_locale(lname)
    159 	const char *lname;
    160 {
    161 	char buf[BUFSIZ], tmp[BUFSIZ];
    162 	char *l, *t, *c, *m;
    163 	static char result[BUFSIZ];
    164 
    165 	memset(result, 0, sizeof(result));
    166 
    167 	if (strlen(lname) + 1 > sizeof(buf)) {
    168 fail:
    169 		return lname;
    170 	}
    171 
    172 	strlcpy(buf, lname, sizeof(buf));
    173 	m = strrchr(buf, '@');
    174 	if (m)
    175 		*m++ = '\0';
    176 	c = strrchr(buf, '.');
    177 	if (c)
    178 		*c++ = '\0';
    179 	t = strrchr(buf, '_');
    180 	if (t)
    181 		*t++ = '\0';
    182 	l = buf;
    183 	if (strlen(l) == 0)
    184 		goto fail;
    185 	if (c && !t)
    186 		goto fail;
    187 
    188 	if (m) {
    189 		if (t) {
    190 			if (c) {
    191 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
    192 				    l, t, c, m);
    193 				strlcat(result, tmp, sizeof(result));
    194 				strlcat(result, ":", sizeof(result));
    195 			}
    196 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
    197 			strlcat(result, tmp, sizeof(result));
    198 			strlcat(result, ":", sizeof(result));
    199 		}
    200 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
    201 		strlcat(result, tmp, sizeof(result));
    202 		strlcat(result, ":", sizeof(result));
    203 	}
    204 	if (t) {
    205 		if (c) {
    206 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
    207 			strlcat(result, tmp, sizeof(result));
    208 			strlcat(result, ":", sizeof(result));
    209 		}
    210 		strlcat(result, tmp, sizeof(result));
    211 		strlcat(result, ":", sizeof(result));
    212 	}
    213 	strlcat(result, l, sizeof(result));
    214 
    215 	return result;
    216 }
    217 
    218 static const char *
    219 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
    220 	char *buf;
    221 	size_t len;
    222 	const char *dir;
    223 	char *lpath;	/* list of locales to be tried */
    224 	const char *category;
    225 	const char *domainname;
    226 	struct domainbinding *db;
    227 {
    228 	struct stat st;
    229 	char *p, *q;
    230 
    231 	q = lpath;
    232 	/* CONSTCOND */
    233 	while (1) {
    234 		p = strsep(&q, ":");
    235 		if (!p)
    236 			break;
    237 		if (!*p)
    238 			continue;
    239 
    240 		/* don't mess with default locales */
    241 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
    242 			return NULL;
    243 
    244 		/* validate pathname */
    245 		if (strchr(p, '/') || strchr(category, '/'))
    246 			continue;
    247 #if 1	/*?*/
    248 		if (strchr(domainname, '/'))
    249 			continue;
    250 #endif
    251 
    252 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
    253 		    category, domainname);
    254 		if (stat(buf, &st) < 0)
    255 			continue;
    256 		if ((st.st_mode & S_IFMT) != S_IFREG)
    257 			continue;
    258 
    259 		if (mapit(buf, db) == 0)
    260 			return buf;
    261 	}
    262 
    263 	return NULL;
    264 }
    265 
    266 static u_int32_t
    267 flip(v, magic)
    268 	u_int32_t v;
    269 	u_int32_t magic;
    270 {
    271 
    272 	if (magic == MO_MAGIC)
    273 		return v;
    274 	else if (magic == MO_MAGIC_SWAPPED) {
    275 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
    276 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
    277 		return v;
    278 	} else {
    279 		abort();
    280 		/*NOTREACHED*/
    281 	}
    282 }
    283 
    284 static int
    285 validate(arg, mohandle)
    286 	void *arg;
    287 	struct mohandle *mohandle;
    288 {
    289 	char *p;
    290 
    291 	p = (char *)arg;
    292 	if (p < (char *)mohandle->addr ||
    293 	    p > (char *)mohandle->addr + mohandle->len)
    294 		return 0;
    295 	else
    296 		return 1;
    297 }
    298 
    299 int
    300 mapit(path, db)
    301 	const char *path;
    302 	struct domainbinding *db;
    303 {
    304 	int fd;
    305 	struct stat st;
    306 	char *base;
    307 	u_int32_t magic, revision;
    308 	struct moentry *otable, *ttable;
    309 	struct moentry_h *p;
    310 	struct mo *mo;
    311 	size_t l;
    312 	int i;
    313 	char *v;
    314 	struct mohandle *mohandle = &db->mohandle;
    315 
    316 	if (mohandle->addr && mohandle->addr != MAP_FAILED &&
    317 	    mohandle->mo.mo_magic)
    318 		return 0;	/*already opened*/
    319 
    320 	unmapit(db);
    321 
    322 #if 0
    323 	if (secure_path(path) != 0)
    324 		goto fail;
    325 #endif
    326 	if (stat(path, &st) < 0)
    327 		goto fail;
    328 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
    329 		goto fail;
    330 	fd = open(path, O_RDONLY);
    331 	if (fd < 0)
    332 		goto fail;
    333 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
    334 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
    335 		close(fd);
    336 		goto fail;
    337 	}
    338 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
    339 	    flip(revision, magic) != MO_REVISION) {
    340 		close(fd);
    341 		goto fail;
    342 	}
    343 	mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
    344 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
    345 	if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
    346 		close(fd);
    347 		goto fail;
    348 	}
    349 	close(fd);
    350 	mohandle->len = (size_t)st.st_size;
    351 
    352 	base = mohandle->addr;
    353 	mo = (struct mo *)mohandle->addr;
    354 
    355 	/* flip endian.  do not flip magic number! */
    356 	mohandle->mo.mo_magic = mo->mo_magic;
    357 	mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
    358 	mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
    359 
    360 	/* validate otable/ttable */
    361 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
    362 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
    363 	if (!validate(otable, mohandle) ||
    364 	    !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
    365 		unmapit(db);
    366 		goto fail;
    367 	}
    368 	if (!validate(ttable, mohandle) ||
    369 	    !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
    370 		unmapit(db);
    371 		goto fail;
    372 	}
    373 
    374 	/* allocate [ot]table, and convert to normal pointer representation. */
    375 	l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
    376 	mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
    377 	if (!mohandle->mo.mo_otable) {
    378 		unmapit(db);
    379 		goto fail;
    380 	}
    381 	mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
    382 	if (!mohandle->mo.mo_ttable) {
    383 		unmapit(db);
    384 		goto fail;
    385 	}
    386 	p = mohandle->mo.mo_otable;
    387 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
    388 		p[i].len = flip(otable[i].len, magic);
    389 		p[i].off = base + flip(otable[i].off, magic);
    390 
    391 		if (!validate(p[i].off, mohandle) ||
    392 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
    393 			unmapit(db);
    394 			goto fail;
    395 		}
    396 	}
    397 	p = mohandle->mo.mo_ttable;
    398 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
    399 		p[i].len = flip(ttable[i].len, magic);
    400 		p[i].off = base + flip(ttable[i].off, magic);
    401 
    402 		if (!validate(p[i].off, mohandle) ||
    403 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
    404 			unmapit(db);
    405 			goto fail;
    406 		}
    407 	}
    408 
    409 	/* grab MIME-header and charset field */
    410 	mohandle->mo.mo_header = lookup("", db);
    411 	if (mohandle->mo.mo_header)
    412 		v = strstr(mohandle->mo.mo_header, "charset=");
    413 	else
    414 		v = NULL;
    415 	if (v) {
    416 		mohandle->mo.mo_charset = strdup(v + 8);
    417 		if (!mohandle->mo.mo_charset)
    418 			goto fail;
    419 		v = strchr(mohandle->mo.mo_charset, '\n');
    420 		if (v)
    421 			*v = '\0';
    422 	}
    423 
    424 	/*
    425 	 * XXX check charset, reject it if we are unable to support the charset
    426 	 * with the current locale.
    427 	 * for example, if we are using euc-jp locale and we are looking at
    428 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
    429 	 * the *.mo file as we cannot support it.
    430 	 */
    431 
    432 	return 0;
    433 
    434 fail:
    435 	return -1;
    436 }
    437 
    438 static int
    439 unmapit(db)
    440 	struct domainbinding *db;
    441 {
    442 	struct mohandle *mohandle = &db->mohandle;
    443 
    444 	/* unmap if there's already mapped region */
    445 	if (mohandle->addr && mohandle->addr != MAP_FAILED)
    446 		munmap(mohandle->addr, mohandle->len);
    447 	mohandle->addr = NULL;
    448 	if (mohandle->mo.mo_otable)
    449 		free(mohandle->mo.mo_otable);
    450 	if (mohandle->mo.mo_ttable)
    451 		free(mohandle->mo.mo_ttable);
    452 	if (mohandle->mo.mo_charset)
    453 		free(mohandle->mo.mo_charset);
    454 	memset(&mohandle->mo, 0, sizeof(mohandle->mo));
    455 	return 0;
    456 }
    457 
    458 /* ARGSUSED */
    459 static const char *
    460 lookup_hash(msgid, db)
    461 	const char *msgid;
    462 	struct domainbinding *db;
    463 {
    464 
    465 	/*
    466 	 * XXX should try a hashed lookup here, but to do so, we need to
    467 	 * look inside the GPL'ed *.c and re-implement...
    468 	 */
    469 	return NULL;
    470 }
    471 
    472 static const char *
    473 lookup_bsearch(msgid, db)
    474 	const char *msgid;
    475 	struct domainbinding *db;
    476 {
    477 	int top, bottom, middle, omiddle;
    478 	int n;
    479 	struct mohandle *mohandle = &db->mohandle;
    480 
    481 	top = 0;
    482 	bottom = mohandle->mo.mo_nstring;
    483 	omiddle = -1;
    484 	/* CONSTCOND */
    485 	while (1) {
    486 		if (top > bottom)
    487 			break;
    488 		middle = (top + bottom) / 2;
    489 		/* avoid possible infinite loop, when the data is not sorted */
    490 		if (omiddle == middle)
    491 			break;
    492 		if (middle < 0 || middle >= mohandle->mo.mo_nstring)
    493 			break;
    494 
    495 		n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
    496 		if (n == 0)
    497 			return (const char *)mohandle->mo.mo_ttable[middle].off;
    498 		else if (n < 0)
    499 			bottom = middle;
    500 		else
    501 			top = middle;
    502 		omiddle = middle;
    503 	}
    504 
    505 	return NULL;
    506 }
    507 
    508 static const char *
    509 lookup(msgid, db)
    510 	const char *msgid;
    511 	struct domainbinding *db;
    512 {
    513 	const char *v;
    514 
    515 	v = lookup_hash(msgid, db);
    516 	if (v)
    517 		return v;
    518 
    519 	return lookup_bsearch(msgid, db);
    520 }
    521 
    522 char *
    523 dcngettext(domainname, msgid1, msgid2, n, category)
    524 	const char *domainname;
    525 	const char *msgid1;
    526 	const char *msgid2;
    527 	unsigned long int n;
    528 	int category;
    529 {
    530 	const char *msgid;
    531 	char path[PATH_MAX];
    532 	static char lpath[PATH_MAX];
    533 	static char olpath[PATH_MAX];
    534 	const char *locale;
    535 	const char *language;
    536 	const char *cname = NULL;
    537 	const char *v;
    538 	static char *ocname = NULL;
    539 	static char *odomainname = NULL;
    540 	struct domainbinding *db;
    541 
    542 	msgid = (n == 1) ? msgid1 : msgid2;
    543 	if (msgid == NULL)
    544 		return NULL;
    545 
    546 	if (!domainname)
    547 		domainname = __current_domainname;
    548 	cname = lookup_category(category);
    549 	if (!domainname || !cname)
    550 		goto fail;
    551 
    552 	language = getenv("LANGUAGE");
    553 	locale = setlocale(LC_MESSAGES, NULL);	/*XXX*/
    554 	if (locale)
    555 		locale = split_locale(locale);
    556 	if (language && locale) {
    557 		if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
    558 			goto fail;
    559 		snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
    560 	} else if (language) {
    561 		if (strlen(language) + 1 > sizeof(lpath))
    562 			goto fail;
    563 		strlcpy(lpath, language, sizeof(lpath));
    564 	} else if (locale) {
    565 		if (strlen(locale) + 1 > sizeof(lpath))
    566 			goto fail;
    567 		strlcpy(lpath, locale, sizeof(lpath));
    568 	} else
    569 		goto fail;
    570 
    571 	for (db = __bindings; db; db = db->next)
    572 		if (strcmp(db->domainname, domainname) == 0)
    573 			break;
    574 	if (!db) {
    575 		if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
    576 			goto fail;
    577 		db = __bindings;
    578 	}
    579 
    580 	/* don't bother looking it up if the values are the same */
    581 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
    582 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
    583 	    db->mohandle.mo.mo_magic)
    584 		goto found;
    585 
    586 	/* try to find appropriate file, from $LANGUAGE */
    587 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
    588 	    domainname, db) == NULL)
    589 		goto fail;
    590 
    591 	if (odomainname)
    592 		free(odomainname);
    593 	if (ocname)
    594 		free(ocname);
    595 	odomainname = strdup(domainname);
    596 	ocname = strdup(cname);
    597 	if (!odomainname || !ocname) {
    598 		if (odomainname)
    599 			free(odomainname);
    600 		if (ocname)
    601 			free(ocname);
    602 		odomainname = ocname = NULL;
    603 		goto fail;
    604 	}
    605 
    606 	strlcpy(olpath, lpath, sizeof(olpath));
    607 
    608 found:
    609 	v = lookup(msgid, db);
    610 	if (v) {
    611 		/*
    612 		 * XXX call iconv() here, if translated text is encoded
    613 		 * differently from currently-selected encoding (locale).
    614 		 * look at Content-type header in *.mo file, in string obtained
    615 		 * by gettext("").
    616 		 */
    617 
    618 		/*
    619 		 * Given the amount of printf-format security issues, it may
    620 		 * be a good idea to validate if the original msgid and the
    621 		 * translated message format string carry the same printf-like
    622 		 * format identifiers.
    623 		 */
    624 
    625 		msgid = v;
    626 	}
    627 
    628 fail:
    629 	/* LINTED const cast */
    630 	return (char *)msgid;
    631 }
    632