Home | History | Annotate | Line # | Download | only in citrus
citrus_csmapper.c revision 1.10.8.1
      1  1.10.8.1      yamt /*	$NetBSD: citrus_csmapper.c,v 1.10.8.1 2012/04/17 00:05:16 yamt Exp $	*/
      2       1.1  tshiozak 
      3       1.1  tshiozak /*-
      4       1.1  tshiozak  * Copyright (c)2003 Citrus Project,
      5       1.1  tshiozak  * All rights reserved.
      6       1.1  tshiozak  *
      7       1.1  tshiozak  * Redistribution and use in source and binary forms, with or without
      8       1.1  tshiozak  * modification, are permitted provided that the following conditions
      9       1.1  tshiozak  * are met:
     10       1.1  tshiozak  * 1. Redistributions of source code must retain the above copyright
     11       1.1  tshiozak  *    notice, this list of conditions and the following disclaimer.
     12       1.1  tshiozak  * 2. Redistributions in binary form must reproduce the above copyright
     13       1.1  tshiozak  *    notice, this list of conditions and the following disclaimer in the
     14       1.1  tshiozak  *    documentation and/or other materials provided with the distribution.
     15       1.1  tshiozak  *
     16       1.1  tshiozak  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17       1.1  tshiozak  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18       1.1  tshiozak  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19       1.1  tshiozak  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20       1.1  tshiozak  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21       1.1  tshiozak  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22       1.1  tshiozak  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23       1.1  tshiozak  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24       1.1  tshiozak  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25       1.1  tshiozak  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26       1.1  tshiozak  * SUCH DAMAGE.
     27       1.1  tshiozak  */
     28       1.1  tshiozak 
     29       1.1  tshiozak #include <sys/cdefs.h>
     30       1.1  tshiozak #if defined(LIBC_SCCS) && !defined(lint)
     31  1.10.8.1      yamt __RCSID("$NetBSD: citrus_csmapper.c,v 1.10.8.1 2012/04/17 00:05:16 yamt Exp $");
     32       1.1  tshiozak #endif /* LIBC_SCCS and not lint */
     33       1.1  tshiozak 
     34       1.1  tshiozak #include "namespace.h"
     35       1.1  tshiozak #include "reentrant.h"
     36       1.1  tshiozak #include <assert.h>
     37       1.1  tshiozak #include <stdio.h>
     38       1.1  tshiozak #include <stdlib.h>
     39       1.1  tshiozak #include <string.h>
     40       1.1  tshiozak #include <errno.h>
     41       1.1  tshiozak #include <limits.h>
     42       1.1  tshiozak #include <paths.h>
     43       1.1  tshiozak #include <sys/types.h>
     44       1.1  tshiozak #include <sys/queue.h>
     45       1.1  tshiozak 
     46       1.1  tshiozak #include "citrus_namespace.h"
     47       1.1  tshiozak #include "citrus_types.h"
     48       1.1  tshiozak #include "citrus_bcs.h"
     49       1.1  tshiozak #include "citrus_region.h"
     50       1.1  tshiozak #include "citrus_memstream.h"
     51       1.1  tshiozak #include "citrus_mmap.h"
     52       1.1  tshiozak #include "citrus_module.h"
     53       1.1  tshiozak #include "citrus_hash.h"
     54       1.1  tshiozak #include "citrus_mapper.h"
     55       1.1  tshiozak #include "citrus_csmapper.h"
     56       1.1  tshiozak #include "citrus_pivot_file.h"
     57       1.1  tshiozak #include "citrus_db.h"
     58       1.1  tshiozak #include "citrus_db_hash.h"
     59       1.1  tshiozak #include "citrus_lookup.h"
     60       1.1  tshiozak 
     61       1.1  tshiozak #ifdef _REENTRANT
     62       1.1  tshiozak static rwlock_t lock = RWLOCK_INITIALIZER;
     63       1.1  tshiozak #endif
     64       1.1  tshiozak static struct _citrus_mapper_area *maparea = NULL;
     65       1.1  tshiozak 
     66       1.1  tshiozak #define CS_ALIAS	_PATH_CSMAPPER "/charset.alias"
     67       1.1  tshiozak #define CS_PIVOT	_PATH_CSMAPPER "/charset.pivot"
     68       1.1  tshiozak 
     69       1.1  tshiozak 
     70       1.1  tshiozak /* ---------------------------------------------------------------------- */
     71       1.1  tshiozak 
     72       1.1  tshiozak static int
     73       1.8  junyoung get32(struct _region *r, uint32_t *rval)
     74       1.1  tshiozak {
     75       1.1  tshiozak 	if (_region_size(r) != 4)
     76       1.1  tshiozak 		return EFTYPE;
     77       1.1  tshiozak 
     78       1.5  christos 	memcpy(rval, _region_head(r), (size_t)4);
     79       1.1  tshiozak 	*rval = be32toh(*rval);
     80       1.1  tshiozak 
     81       1.1  tshiozak 	return 0;
     82       1.1  tshiozak }
     83       1.1  tshiozak 
     84       1.1  tshiozak static int
     85       1.1  tshiozak open_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src)
     86       1.1  tshiozak {
     87       1.1  tshiozak 	int ret;
     88       1.1  tshiozak 	struct _region r;
     89       1.1  tshiozak 
     90       1.1  tshiozak 	ret = _db_lookup_by_s(db, src, &r, NULL);
     91       1.1  tshiozak 	if (ret)
     92       1.1  tshiozak 		return ret;
     93       1.1  tshiozak 	ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL);
     94       1.1  tshiozak 	if (ret)
     95       1.1  tshiozak 		return ret;
     96       1.1  tshiozak 
     97       1.1  tshiozak 	return 0;
     98       1.1  tshiozak }
     99       1.1  tshiozak 
    100       1.1  tshiozak 
    101       1.1  tshiozak #define NO_SUCH_FILE	EOPNOTSUPP
    102       1.1  tshiozak static int
    103       1.1  tshiozak find_best_pivot_pvdb(const char *src, const char *dst, char *pivot,
    104       1.1  tshiozak 		     size_t pvlen, unsigned long *rnorm)
    105       1.1  tshiozak {
    106       1.1  tshiozak 	int ret, num, i;
    107       1.1  tshiozak 	struct _region fr, r1, r2;
    108       1.1  tshiozak 	struct _citrus_db *db1, *db2, *db3;
    109       1.1  tshiozak 	char buf[LINE_MAX];
    110       1.1  tshiozak 	unsigned long norm;
    111       1.8  junyoung 	uint32_t val32;
    112       1.1  tshiozak 
    113       1.1  tshiozak 	ret = _map_file(&fr, CS_PIVOT ".pvdb");
    114       1.1  tshiozak 	if (ret) {
    115       1.1  tshiozak 		if (ret == ENOENT)
    116       1.1  tshiozak 			ret = NO_SUCH_FILE;
    117       1.1  tshiozak 		return ret;
    118       1.1  tshiozak 	}
    119       1.1  tshiozak 	ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL);
    120       1.1  tshiozak 	if (ret)
    121       1.1  tshiozak 		goto quit1;
    122       1.1  tshiozak 	ret = open_subdb(&db2, db1, src);
    123       1.1  tshiozak 	if (ret)
    124       1.1  tshiozak 		goto quit2;
    125       1.1  tshiozak 
    126       1.1  tshiozak 	num = _db_get_num_entries(db2);
    127       1.1  tshiozak 	*rnorm = ULONG_MAX;
    128       1.3    itojun 	for (i = 0; i < num; i++) {
    129       1.1  tshiozak 		/* iterate each pivot */
    130       1.1  tshiozak 		ret = _db_get_entry(db2, i, &r1, &r2);
    131       1.1  tshiozak 		if (ret)
    132       1.1  tshiozak 			goto quit3;
    133       1.1  tshiozak 		/* r1:pivot name, r2:norm among src and pivot */
    134       1.1  tshiozak 		ret = get32(&r2, &val32);
    135       1.1  tshiozak 		if (ret)
    136       1.1  tshiozak 			goto quit3;
    137       1.1  tshiozak 		norm = val32;
    138       1.1  tshiozak 		snprintf(buf, sizeof(buf), "%.*s",
    139       1.1  tshiozak 			 (int)_region_size(&r1), (char *)_region_head(&r1));
    140       1.1  tshiozak 		/* buf: pivot name */
    141       1.1  tshiozak 		ret = open_subdb(&db3, db1, buf);
    142       1.1  tshiozak 		if (ret)
    143       1.1  tshiozak 			goto quit3;
    144       1.7   tnozaki 		if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0)
    145  1.10.8.1      yamt 			/* don't break the loop, test all src/dst pairs. */
    146       1.1  tshiozak 			goto quit4;
    147       1.1  tshiozak 		/* r2: norm among pivot and dst */
    148       1.1  tshiozak 		ret = get32(&r2, &val32);
    149       1.1  tshiozak 		if (ret)
    150       1.1  tshiozak 			goto quit4;
    151       1.1  tshiozak 		norm += val32;
    152       1.1  tshiozak 		/* judge minimum norm */
    153       1.1  tshiozak 		if (norm < *rnorm) {
    154       1.1  tshiozak 			*rnorm = norm;
    155       1.1  tshiozak 			strlcpy(pivot, buf, pvlen);
    156       1.1  tshiozak 		}
    157       1.1  tshiozak quit4:
    158       1.1  tshiozak 		_db_close(db3);
    159       1.1  tshiozak 		if (ret)
    160       1.1  tshiozak 			goto quit3;
    161       1.1  tshiozak 	}
    162       1.1  tshiozak quit3:
    163       1.1  tshiozak 	_db_close(db2);
    164       1.1  tshiozak quit2:
    165       1.1  tshiozak 	_db_close(db1);
    166       1.1  tshiozak quit1:
    167       1.1  tshiozak 	_unmap_file(&fr);
    168       1.1  tshiozak 	if (ret)
    169       1.1  tshiozak 		return ret;
    170       1.1  tshiozak 
    171       1.1  tshiozak 	if (*rnorm == ULONG_MAX)
    172       1.1  tshiozak 		return ENOENT;
    173       1.1  tshiozak 
    174       1.1  tshiozak 	return 0;
    175       1.1  tshiozak }
    176       1.1  tshiozak 
    177       1.1  tshiozak /* ---------------------------------------------------------------------- */
    178       1.1  tshiozak 
    179       1.1  tshiozak struct zone {
    180       1.1  tshiozak 	const char *begin, *end;
    181       1.1  tshiozak };
    182       1.1  tshiozak 
    183       1.1  tshiozak struct parse_arg {
    184       1.1  tshiozak 	char dst[PATH_MAX];
    185       1.1  tshiozak 	unsigned long norm;
    186       1.1  tshiozak };
    187       1.1  tshiozak 
    188       1.1  tshiozak static int
    189       1.1  tshiozak parse_line(struct parse_arg *pa, struct _region *r)
    190       1.1  tshiozak {
    191       1.1  tshiozak 	char buf[20];
    192       1.1  tshiozak 	struct zone z1, z2;
    193       1.1  tshiozak 	size_t len;
    194       1.1  tshiozak 
    195       1.1  tshiozak 	len = _region_size(r);
    196       1.1  tshiozak 	z1.begin = _bcs_skip_ws_len(_region_head(r), &len);
    197       1.1  tshiozak 	if (len == 0)
    198       1.1  tshiozak 		return EFTYPE;
    199       1.1  tshiozak 	z1.end = _bcs_skip_nonws_len(z1.begin, &len);
    200       1.1  tshiozak 	if (len == 0)
    201       1.1  tshiozak 		return EFTYPE;
    202       1.1  tshiozak 	z2.begin = _bcs_skip_ws_len(z1.end, &len);
    203       1.1  tshiozak 	if (len == 0)
    204       1.1  tshiozak 		return EFTYPE;
    205       1.1  tshiozak 	z2.end = _bcs_skip_nonws_len(z2.begin, &len);
    206       1.1  tshiozak 
    207       1.1  tshiozak 	/* z1 : dst name, z2 : norm */
    208       1.1  tshiozak 	snprintf(pa->dst, sizeof(pa->dst),
    209       1.1  tshiozak 		 "%.*s", (int)(z1.end-z1.begin), z1.begin);
    210       1.1  tshiozak 	snprintf(buf, sizeof(buf),
    211       1.1  tshiozak 		 "%.*s", (int)(z2.end-z2.begin), z2.begin);
    212       1.9   tnozaki 	pa->norm = _bcs_strtoul(buf, NULL, 0);
    213       1.1  tshiozak 
    214       1.1  tshiozak 	return 0;
    215       1.1  tshiozak }
    216       1.1  tshiozak 
    217       1.1  tshiozak static int
    218       1.1  tshiozak find_dst(struct parse_arg *pasrc, const char *dst)
    219       1.1  tshiozak {
    220       1.1  tshiozak 	int ret;
    221       1.1  tshiozak 	struct parse_arg padst;
    222       1.1  tshiozak 	struct _lookup *cl;
    223       1.1  tshiozak 	struct _region data;
    224       1.1  tshiozak 
    225       1.4  tshiozak 	ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE);
    226       1.1  tshiozak 	if (ret)
    227       1.1  tshiozak 		return ret;
    228       1.1  tshiozak 
    229       1.1  tshiozak 	ret = _lookup_seq_lookup(cl, pasrc->dst, &data);
    230       1.1  tshiozak 	while (ret == 0) {
    231       1.1  tshiozak 		ret = parse_line(&padst, &data);
    232       1.1  tshiozak 		if (ret)
    233       1.1  tshiozak 			break;
    234       1.1  tshiozak 		if (strcmp(dst, padst.dst) == 0) {
    235       1.1  tshiozak 			pasrc->norm += padst.norm;
    236       1.1  tshiozak 			break;
    237       1.1  tshiozak 		}
    238       1.1  tshiozak 		ret = _lookup_seq_next(cl, NULL, &data);
    239       1.1  tshiozak 	}
    240       1.1  tshiozak 	_lookup_seq_close(cl);
    241       1.1  tshiozak 
    242       1.1  tshiozak 	return ret;
    243       1.1  tshiozak }
    244       1.1  tshiozak 
    245       1.1  tshiozak static int
    246       1.1  tshiozak find_best_pivot_lookup(const char *src, const char *dst, char *pivot,
    247       1.1  tshiozak 		       size_t pvlen, unsigned long *rnorm)
    248       1.1  tshiozak {
    249       1.1  tshiozak 	int ret;
    250       1.1  tshiozak 	struct _lookup *cl;
    251       1.1  tshiozak 	struct _region data;
    252       1.1  tshiozak 	struct parse_arg pa;
    253       1.1  tshiozak 	unsigned long norm_min;
    254       1.1  tshiozak 	char pivot_min[PATH_MAX];
    255       1.1  tshiozak 
    256       1.4  tshiozak 	ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE);
    257       1.1  tshiozak 	if (ret)
    258       1.1  tshiozak 		return ret;
    259       1.1  tshiozak 
    260       1.1  tshiozak 	norm_min = ULONG_MAX;
    261       1.1  tshiozak 
    262       1.1  tshiozak 	/* find pivot code */
    263       1.1  tshiozak 	ret = _lookup_seq_lookup(cl, src, &data);
    264       1.1  tshiozak 	while (ret == 0) {
    265       1.1  tshiozak 		ret = parse_line(&pa, &data);
    266       1.1  tshiozak 		if (ret)
    267       1.1  tshiozak 			break;
    268       1.1  tshiozak 		ret = find_dst(&pa, dst);
    269       1.1  tshiozak 		if (ret)
    270       1.1  tshiozak 			break;
    271       1.1  tshiozak 		if (pa.norm < norm_min) {
    272       1.1  tshiozak 			norm_min = pa.norm;
    273       1.2    itojun 			strlcpy(pivot_min, pa.dst, sizeof(pivot_min));
    274       1.1  tshiozak 		}
    275       1.1  tshiozak 		ret = _lookup_seq_next(cl, NULL, &data);
    276       1.1  tshiozak 	}
    277       1.1  tshiozak 	_lookup_seq_close(cl);
    278       1.1  tshiozak 
    279       1.1  tshiozak 	if (ret != ENOENT)
    280       1.1  tshiozak 		return ret;
    281       1.1  tshiozak 	if (norm_min == ULONG_MAX)
    282       1.1  tshiozak 		return ENOENT;
    283       1.1  tshiozak 	strlcpy(pivot, pivot_min, pvlen);
    284       1.1  tshiozak 	if (rnorm)
    285       1.1  tshiozak 		*rnorm = norm_min;
    286       1.1  tshiozak 
    287       1.1  tshiozak 	return 0;
    288       1.1  tshiozak }
    289       1.1  tshiozak 
    290       1.1  tshiozak static int
    291       1.1  tshiozak find_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen,
    292       1.1  tshiozak 		unsigned long *rnorm)
    293       1.1  tshiozak {
    294       1.1  tshiozak 	int ret;
    295       1.1  tshiozak 
    296       1.1  tshiozak 	ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm);
    297       1.1  tshiozak 	if (ret == NO_SUCH_FILE)
    298       1.1  tshiozak 		ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm);
    299       1.1  tshiozak 
    300       1.1  tshiozak 	return ret;
    301       1.1  tshiozak }
    302       1.1  tshiozak 
    303       1.1  tshiozak static __inline int
    304       1.1  tshiozak open_serial_mapper(struct _citrus_mapper_area *__restrict ma,
    305       1.1  tshiozak 		   struct _citrus_mapper * __restrict * __restrict rcm,
    306       1.1  tshiozak 		   const char *src, const char *pivot, const char *dst)
    307       1.1  tshiozak {
    308       1.1  tshiozak 	char buf[PATH_MAX];
    309       1.1  tshiozak 
    310       1.1  tshiozak 	snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst);
    311       1.1  tshiozak 
    312       1.1  tshiozak 	return _mapper_open_direct(ma, rcm, "mapper_serial", buf);
    313       1.1  tshiozak }
    314       1.1  tshiozak 
    315       1.1  tshiozak static struct _citrus_csmapper *csm_none = NULL;
    316       1.1  tshiozak static int
    317       1.1  tshiozak get_none(struct _citrus_mapper_area *__restrict ma,
    318       1.1  tshiozak 	 struct _citrus_csmapper *__restrict *__restrict rcsm)
    319       1.1  tshiozak {
    320       1.1  tshiozak 	int ret;
    321       1.1  tshiozak 
    322       1.1  tshiozak 	rwlock_wrlock(&lock);
    323       1.1  tshiozak 	if (csm_none) {
    324       1.1  tshiozak 		*rcsm = csm_none;
    325       1.1  tshiozak 		ret = 0;
    326       1.1  tshiozak 		goto quit;
    327       1.1  tshiozak 	}
    328       1.1  tshiozak 
    329       1.1  tshiozak 	ret = _mapper_open_direct(ma, &csm_none, "mapper_none", "");
    330       1.1  tshiozak 	if (ret)
    331       1.1  tshiozak 		goto quit;
    332       1.1  tshiozak 	_mapper_set_persistent(csm_none);
    333       1.1  tshiozak 
    334       1.1  tshiozak 	*rcsm = csm_none;
    335       1.1  tshiozak 	ret = 0;
    336       1.1  tshiozak quit:
    337       1.1  tshiozak 	rwlock_unlock(&lock);
    338       1.1  tshiozak 	return ret;
    339       1.1  tshiozak }
    340       1.1  tshiozak 
    341       1.1  tshiozak int
    342       1.1  tshiozak _citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm,
    343       1.1  tshiozak 		      const char * __restrict src, const char * __restrict dst,
    344       1.8  junyoung 		      uint32_t flags, unsigned long *rnorm)
    345       1.1  tshiozak {
    346       1.1  tshiozak 	int ret;
    347       1.1  tshiozak 	char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX];
    348       1.1  tshiozak 	const char *realsrc, *realdst;
    349       1.1  tshiozak 	unsigned long norm;
    350       1.1  tshiozak 
    351       1.6       mrg 	norm = 0;	/* XXX gcc */
    352       1.6       mrg 
    353       1.1  tshiozak 	ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER);
    354       1.1  tshiozak 	if (ret)
    355       1.1  tshiozak 		return ret;
    356       1.1  tshiozak 
    357       1.4  tshiozak 	realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1),
    358       1.4  tshiozak 				_LOOKUP_CASE_IGNORE);
    359       1.4  tshiozak 	realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2),
    360       1.4  tshiozak 				_LOOKUP_CASE_IGNORE);
    361       1.1  tshiozak 	if (!strcmp(realsrc, realdst)) {
    362       1.1  tshiozak 		ret = get_none(maparea, rcsm);
    363       1.1  tshiozak 		if (ret == 0 && rnorm != NULL)
    364       1.1  tshiozak 			*rnorm = 0;
    365       1.1  tshiozak 		return ret;
    366       1.1  tshiozak 	}
    367       1.1  tshiozak 
    368       1.1  tshiozak 	snprintf(key, sizeof(key), "%s/%s", realsrc, realdst);
    369       1.1  tshiozak 
    370       1.1  tshiozak 	ret = _mapper_open(maparea, rcsm, key);
    371       1.1  tshiozak 	if (ret == 0) {
    372       1.1  tshiozak 		if (rnorm != NULL)
    373       1.1  tshiozak 			*rnorm = 0;
    374       1.1  tshiozak 		return 0;
    375       1.1  tshiozak 	}
    376       1.1  tshiozak 	if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0)
    377       1.1  tshiozak 		return ret;
    378       1.1  tshiozak 
    379       1.1  tshiozak 	ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm);
    380       1.1  tshiozak 	if (ret)
    381       1.1  tshiozak 		return ret;
    382       1.1  tshiozak 
    383       1.1  tshiozak 	ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst);
    384       1.1  tshiozak 	if (ret == 0 && rnorm != NULL)
    385       1.1  tshiozak 		*rnorm = norm;
    386       1.1  tshiozak 
    387       1.1  tshiozak 	return ret;
    388       1.1  tshiozak }
    389