Home | History | Annotate | Line # | Download | only in citrus
      1  1.12  christos /*	$NetBSD: citrus_csmapper.c,v 1.12 2019/10/09 23:23:41 christos Exp $	*/
      2   1.1  tshiozak 
      3   1.1  tshiozak /*-
      4   1.1  tshiozak  * Copyright (c)2003 Citrus Project,
      5   1.1  tshiozak  * All rights reserved.
      6   1.1  tshiozak  *
      7   1.1  tshiozak  * Redistribution and use in source and binary forms, with or without
      8   1.1  tshiozak  * modification, are permitted provided that the following conditions
      9   1.1  tshiozak  * are met:
     10   1.1  tshiozak  * 1. Redistributions of source code must retain the above copyright
     11   1.1  tshiozak  *    notice, this list of conditions and the following disclaimer.
     12   1.1  tshiozak  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1  tshiozak  *    notice, this list of conditions and the following disclaimer in the
     14   1.1  tshiozak  *    documentation and/or other materials provided with the distribution.
     15   1.1  tshiozak  *
     16   1.1  tshiozak  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17   1.1  tshiozak  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18   1.1  tshiozak  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19   1.1  tshiozak  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20   1.1  tshiozak  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21   1.1  tshiozak  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22   1.1  tshiozak  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23   1.1  tshiozak  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24   1.1  tshiozak  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25   1.1  tshiozak  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26   1.1  tshiozak  * SUCH DAMAGE.
     27   1.1  tshiozak  */
     28   1.1  tshiozak 
     29   1.1  tshiozak #include <sys/cdefs.h>
     30   1.1  tshiozak #if defined(LIBC_SCCS) && !defined(lint)
     31  1.12  christos __RCSID("$NetBSD: citrus_csmapper.c,v 1.12 2019/10/09 23:23:41 christos Exp $");
     32   1.1  tshiozak #endif /* LIBC_SCCS and not lint */
     33   1.1  tshiozak 
     34   1.1  tshiozak #include "namespace.h"
     35   1.1  tshiozak #include "reentrant.h"
     36   1.1  tshiozak #include <assert.h>
     37   1.1  tshiozak #include <stdio.h>
     38   1.1  tshiozak #include <stdlib.h>
     39   1.1  tshiozak #include <string.h>
     40   1.1  tshiozak #include <errno.h>
     41   1.1  tshiozak #include <limits.h>
     42   1.1  tshiozak #include <paths.h>
     43   1.1  tshiozak #include <sys/types.h>
     44   1.1  tshiozak #include <sys/queue.h>
     45   1.1  tshiozak 
     46   1.1  tshiozak #include "citrus_namespace.h"
     47   1.1  tshiozak #include "citrus_types.h"
     48   1.1  tshiozak #include "citrus_bcs.h"
     49   1.1  tshiozak #include "citrus_region.h"
     50   1.1  tshiozak #include "citrus_memstream.h"
     51   1.1  tshiozak #include "citrus_mmap.h"
     52   1.1  tshiozak #include "citrus_module.h"
     53   1.1  tshiozak #include "citrus_hash.h"
     54   1.1  tshiozak #include "citrus_mapper.h"
     55   1.1  tshiozak #include "citrus_csmapper.h"
     56   1.1  tshiozak #include "citrus_pivot_file.h"
     57   1.1  tshiozak #include "citrus_db.h"
     58   1.1  tshiozak #include "citrus_db_hash.h"
     59   1.1  tshiozak #include "citrus_lookup.h"
     60   1.1  tshiozak 
     61   1.1  tshiozak #ifdef _REENTRANT
     62   1.1  tshiozak static rwlock_t lock = RWLOCK_INITIALIZER;
     63   1.1  tshiozak #endif
     64   1.1  tshiozak static struct _citrus_mapper_area *maparea = NULL;
     65   1.1  tshiozak 
     66   1.1  tshiozak #define CS_ALIAS	_PATH_CSMAPPER "/charset.alias"
     67   1.1  tshiozak #define CS_PIVOT	_PATH_CSMAPPER "/charset.pivot"
     68   1.1  tshiozak 
     69   1.1  tshiozak 
     70   1.1  tshiozak /* ---------------------------------------------------------------------- */
     71   1.1  tshiozak 
     72   1.1  tshiozak static int
     73   1.8  junyoung get32(struct _region *r, uint32_t *rval)
     74   1.1  tshiozak {
     75   1.1  tshiozak 	if (_region_size(r) != 4)
     76   1.1  tshiozak 		return EFTYPE;
     77   1.1  tshiozak 
     78   1.5  christos 	memcpy(rval, _region_head(r), (size_t)4);
     79   1.1  tshiozak 	*rval = be32toh(*rval);
     80   1.1  tshiozak 
     81   1.1  tshiozak 	return 0;
     82   1.1  tshiozak }
     83   1.1  tshiozak 
     84   1.1  tshiozak static int
     85   1.1  tshiozak open_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src)
     86   1.1  tshiozak {
     87   1.1  tshiozak 	int ret;
     88   1.1  tshiozak 	struct _region r;
     89   1.1  tshiozak 
     90   1.1  tshiozak 	ret = _db_lookup_by_s(db, src, &r, NULL);
     91   1.1  tshiozak 	if (ret)
     92   1.1  tshiozak 		return ret;
     93   1.1  tshiozak 	ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL);
     94   1.1  tshiozak 	if (ret)
     95   1.1  tshiozak 		return ret;
     96   1.1  tshiozak 
     97   1.1  tshiozak 	return 0;
     98   1.1  tshiozak }
     99   1.1  tshiozak 
    100   1.1  tshiozak 
    101   1.1  tshiozak #define NO_SUCH_FILE	EOPNOTSUPP
    102   1.1  tshiozak static int
    103   1.1  tshiozak find_best_pivot_pvdb(const char *src, const char *dst, char *pivot,
    104   1.1  tshiozak 		     size_t pvlen, unsigned long *rnorm)
    105   1.1  tshiozak {
    106   1.1  tshiozak 	int ret, num, i;
    107   1.1  tshiozak 	struct _region fr, r1, r2;
    108   1.1  tshiozak 	struct _citrus_db *db1, *db2, *db3;
    109   1.1  tshiozak 	char buf[LINE_MAX];
    110   1.1  tshiozak 	unsigned long norm;
    111   1.8  junyoung 	uint32_t val32;
    112   1.1  tshiozak 
    113   1.1  tshiozak 	ret = _map_file(&fr, CS_PIVOT ".pvdb");
    114   1.1  tshiozak 	if (ret) {
    115   1.1  tshiozak 		if (ret == ENOENT)
    116   1.1  tshiozak 			ret = NO_SUCH_FILE;
    117   1.1  tshiozak 		return ret;
    118   1.1  tshiozak 	}
    119   1.1  tshiozak 	ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL);
    120   1.1  tshiozak 	if (ret)
    121   1.1  tshiozak 		goto quit1;
    122   1.1  tshiozak 	ret = open_subdb(&db2, db1, src);
    123   1.1  tshiozak 	if (ret)
    124   1.1  tshiozak 		goto quit2;
    125   1.1  tshiozak 
    126   1.1  tshiozak 	num = _db_get_num_entries(db2);
    127   1.1  tshiozak 	*rnorm = ULONG_MAX;
    128   1.3    itojun 	for (i = 0; i < num; i++) {
    129   1.1  tshiozak 		/* iterate each pivot */
    130   1.1  tshiozak 		ret = _db_get_entry(db2, i, &r1, &r2);
    131   1.1  tshiozak 		if (ret)
    132   1.1  tshiozak 			goto quit3;
    133   1.1  tshiozak 		/* r1:pivot name, r2:norm among src and pivot */
    134   1.1  tshiozak 		ret = get32(&r2, &val32);
    135   1.1  tshiozak 		if (ret)
    136   1.1  tshiozak 			goto quit3;
    137   1.1  tshiozak 		norm = val32;
    138   1.1  tshiozak 		snprintf(buf, sizeof(buf), "%.*s",
    139   1.1  tshiozak 			 (int)_region_size(&r1), (char *)_region_head(&r1));
    140   1.1  tshiozak 		/* buf: pivot name */
    141   1.1  tshiozak 		ret = open_subdb(&db3, db1, buf);
    142   1.1  tshiozak 		if (ret)
    143   1.1  tshiozak 			goto quit3;
    144   1.7   tnozaki 		if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0)
    145  1.11   tnozaki 			/* don't break the loop, test all src/dst pairs. */
    146   1.1  tshiozak 			goto quit4;
    147   1.1  tshiozak 		/* r2: norm among pivot and dst */
    148   1.1  tshiozak 		ret = get32(&r2, &val32);
    149   1.1  tshiozak 		if (ret)
    150   1.1  tshiozak 			goto quit4;
    151   1.1  tshiozak 		norm += val32;
    152   1.1  tshiozak 		/* judge minimum norm */
    153   1.1  tshiozak 		if (norm < *rnorm) {
    154   1.1  tshiozak 			*rnorm = norm;
    155   1.1  tshiozak 			strlcpy(pivot, buf, pvlen);
    156   1.1  tshiozak 		}
    157   1.1  tshiozak quit4:
    158   1.1  tshiozak 		_db_close(db3);
    159   1.1  tshiozak 		if (ret)
    160   1.1  tshiozak 			goto quit3;
    161   1.1  tshiozak 	}
    162   1.1  tshiozak quit3:
    163   1.1  tshiozak 	_db_close(db2);
    164   1.1  tshiozak quit2:
    165   1.1  tshiozak 	_db_close(db1);
    166   1.1  tshiozak quit1:
    167   1.1  tshiozak 	_unmap_file(&fr);
    168   1.1  tshiozak 	if (ret)
    169   1.1  tshiozak 		return ret;
    170   1.1  tshiozak 
    171   1.1  tshiozak 	if (*rnorm == ULONG_MAX)
    172   1.1  tshiozak 		return ENOENT;
    173   1.1  tshiozak 
    174   1.1  tshiozak 	return 0;
    175   1.1  tshiozak }
    176   1.1  tshiozak 
    177   1.1  tshiozak /* ---------------------------------------------------------------------- */
    178   1.1  tshiozak 
    179   1.1  tshiozak struct zone {
    180   1.1  tshiozak 	const char *begin, *end;
    181   1.1  tshiozak };
    182   1.1  tshiozak 
    183   1.1  tshiozak struct parse_arg {
    184   1.1  tshiozak 	char dst[PATH_MAX];
    185   1.1  tshiozak 	unsigned long norm;
    186   1.1  tshiozak };
    187   1.1  tshiozak 
    188   1.1  tshiozak static int
    189   1.1  tshiozak parse_line(struct parse_arg *pa, struct _region *r)
    190   1.1  tshiozak {
    191   1.1  tshiozak 	char buf[20];
    192   1.1  tshiozak 	struct zone z1, z2;
    193   1.1  tshiozak 	size_t len;
    194   1.1  tshiozak 
    195   1.1  tshiozak 	len = _region_size(r);
    196   1.1  tshiozak 	z1.begin = _bcs_skip_ws_len(_region_head(r), &len);
    197   1.1  tshiozak 	if (len == 0)
    198   1.1  tshiozak 		return EFTYPE;
    199   1.1  tshiozak 	z1.end = _bcs_skip_nonws_len(z1.begin, &len);
    200   1.1  tshiozak 	if (len == 0)
    201   1.1  tshiozak 		return EFTYPE;
    202   1.1  tshiozak 	z2.begin = _bcs_skip_ws_len(z1.end, &len);
    203   1.1  tshiozak 	if (len == 0)
    204   1.1  tshiozak 		return EFTYPE;
    205   1.1  tshiozak 	z2.end = _bcs_skip_nonws_len(z2.begin, &len);
    206   1.1  tshiozak 
    207   1.1  tshiozak 	/* z1 : dst name, z2 : norm */
    208   1.1  tshiozak 	snprintf(pa->dst, sizeof(pa->dst),
    209   1.1  tshiozak 		 "%.*s", (int)(z1.end-z1.begin), z1.begin);
    210   1.1  tshiozak 	snprintf(buf, sizeof(buf),
    211   1.1  tshiozak 		 "%.*s", (int)(z2.end-z2.begin), z2.begin);
    212   1.9   tnozaki 	pa->norm = _bcs_strtoul(buf, NULL, 0);
    213   1.1  tshiozak 
    214   1.1  tshiozak 	return 0;
    215   1.1  tshiozak }
    216   1.1  tshiozak 
    217   1.1  tshiozak static int
    218   1.1  tshiozak find_dst(struct parse_arg *pasrc, const char *dst)
    219   1.1  tshiozak {
    220   1.1  tshiozak 	int ret;
    221   1.1  tshiozak 	struct parse_arg padst;
    222   1.1  tshiozak 	struct _lookup *cl;
    223   1.1  tshiozak 	struct _region data;
    224   1.1  tshiozak 
    225   1.4  tshiozak 	ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE);
    226   1.1  tshiozak 	if (ret)
    227   1.1  tshiozak 		return ret;
    228   1.1  tshiozak 
    229   1.1  tshiozak 	ret = _lookup_seq_lookup(cl, pasrc->dst, &data);
    230   1.1  tshiozak 	while (ret == 0) {
    231   1.1  tshiozak 		ret = parse_line(&padst, &data);
    232   1.1  tshiozak 		if (ret)
    233   1.1  tshiozak 			break;
    234   1.1  tshiozak 		if (strcmp(dst, padst.dst) == 0) {
    235   1.1  tshiozak 			pasrc->norm += padst.norm;
    236   1.1  tshiozak 			break;
    237   1.1  tshiozak 		}
    238   1.1  tshiozak 		ret = _lookup_seq_next(cl, NULL, &data);
    239   1.1  tshiozak 	}
    240   1.1  tshiozak 	_lookup_seq_close(cl);
    241   1.1  tshiozak 
    242   1.1  tshiozak 	return ret;
    243   1.1  tshiozak }
    244   1.1  tshiozak 
    245   1.1  tshiozak static int
    246   1.1  tshiozak find_best_pivot_lookup(const char *src, const char *dst, char *pivot,
    247   1.1  tshiozak 		       size_t pvlen, unsigned long *rnorm)
    248   1.1  tshiozak {
    249   1.1  tshiozak 	int ret;
    250   1.1  tshiozak 	struct _lookup *cl;
    251   1.1  tshiozak 	struct _region data;
    252   1.1  tshiozak 	struct parse_arg pa;
    253   1.1  tshiozak 	unsigned long norm_min;
    254   1.1  tshiozak 	char pivot_min[PATH_MAX];
    255   1.1  tshiozak 
    256   1.4  tshiozak 	ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE);
    257   1.1  tshiozak 	if (ret)
    258   1.1  tshiozak 		return ret;
    259   1.1  tshiozak 
    260   1.1  tshiozak 	norm_min = ULONG_MAX;
    261   1.1  tshiozak 
    262   1.1  tshiozak 	/* find pivot code */
    263   1.1  tshiozak 	ret = _lookup_seq_lookup(cl, src, &data);
    264   1.1  tshiozak 	while (ret == 0) {
    265   1.1  tshiozak 		ret = parse_line(&pa, &data);
    266   1.1  tshiozak 		if (ret)
    267   1.1  tshiozak 			break;
    268   1.1  tshiozak 		ret = find_dst(&pa, dst);
    269   1.1  tshiozak 		if (ret)
    270   1.1  tshiozak 			break;
    271   1.1  tshiozak 		if (pa.norm < norm_min) {
    272   1.1  tshiozak 			norm_min = pa.norm;
    273   1.2    itojun 			strlcpy(pivot_min, pa.dst, sizeof(pivot_min));
    274   1.1  tshiozak 		}
    275   1.1  tshiozak 		ret = _lookup_seq_next(cl, NULL, &data);
    276   1.1  tshiozak 	}
    277   1.1  tshiozak 	_lookup_seq_close(cl);
    278   1.1  tshiozak 
    279   1.1  tshiozak 	if (ret != ENOENT)
    280   1.1  tshiozak 		return ret;
    281   1.1  tshiozak 	if (norm_min == ULONG_MAX)
    282   1.1  tshiozak 		return ENOENT;
    283   1.1  tshiozak 	strlcpy(pivot, pivot_min, pvlen);
    284   1.1  tshiozak 	if (rnorm)
    285   1.1  tshiozak 		*rnorm = norm_min;
    286   1.1  tshiozak 
    287   1.1  tshiozak 	return 0;
    288   1.1  tshiozak }
    289   1.1  tshiozak 
    290   1.1  tshiozak static int
    291   1.1  tshiozak find_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen,
    292   1.1  tshiozak 		unsigned long *rnorm)
    293   1.1  tshiozak {
    294   1.1  tshiozak 	int ret;
    295   1.1  tshiozak 
    296   1.1  tshiozak 	ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm);
    297   1.1  tshiozak 	if (ret == NO_SUCH_FILE)
    298   1.1  tshiozak 		ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm);
    299   1.1  tshiozak 
    300   1.1  tshiozak 	return ret;
    301   1.1  tshiozak }
    302   1.1  tshiozak 
    303   1.1  tshiozak static __inline int
    304   1.1  tshiozak open_serial_mapper(struct _citrus_mapper_area *__restrict ma,
    305   1.1  tshiozak 		   struct _citrus_mapper * __restrict * __restrict rcm,
    306   1.1  tshiozak 		   const char *src, const char *pivot, const char *dst)
    307   1.1  tshiozak {
    308  1.12  christos 	char buf[4 * PATH_MAX];
    309   1.1  tshiozak 
    310   1.1  tshiozak 	snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst);
    311   1.1  tshiozak 
    312   1.1  tshiozak 	return _mapper_open_direct(ma, rcm, "mapper_serial", buf);
    313   1.1  tshiozak }
    314   1.1  tshiozak 
    315   1.1  tshiozak static struct _citrus_csmapper *csm_none = NULL;
    316   1.1  tshiozak static int
    317   1.1  tshiozak get_none(struct _citrus_mapper_area *__restrict ma,
    318   1.1  tshiozak 	 struct _citrus_csmapper *__restrict *__restrict rcsm)
    319   1.1  tshiozak {
    320   1.1  tshiozak 	int ret;
    321   1.1  tshiozak 
    322   1.1  tshiozak 	rwlock_wrlock(&lock);
    323   1.1  tshiozak 	if (csm_none) {
    324   1.1  tshiozak 		*rcsm = csm_none;
    325   1.1  tshiozak 		ret = 0;
    326   1.1  tshiozak 		goto quit;
    327   1.1  tshiozak 	}
    328   1.1  tshiozak 
    329   1.1  tshiozak 	ret = _mapper_open_direct(ma, &csm_none, "mapper_none", "");
    330   1.1  tshiozak 	if (ret)
    331   1.1  tshiozak 		goto quit;
    332   1.1  tshiozak 	_mapper_set_persistent(csm_none);
    333   1.1  tshiozak 
    334   1.1  tshiozak 	*rcsm = csm_none;
    335   1.1  tshiozak 	ret = 0;
    336   1.1  tshiozak quit:
    337   1.1  tshiozak 	rwlock_unlock(&lock);
    338   1.1  tshiozak 	return ret;
    339   1.1  tshiozak }
    340   1.1  tshiozak 
    341   1.1  tshiozak int
    342   1.1  tshiozak _citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm,
    343   1.1  tshiozak 		      const char * __restrict src, const char * __restrict dst,
    344   1.8  junyoung 		      uint32_t flags, unsigned long *rnorm)
    345   1.1  tshiozak {
    346   1.1  tshiozak 	int ret;
    347   1.1  tshiozak 	char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX];
    348   1.1  tshiozak 	const char *realsrc, *realdst;
    349   1.1  tshiozak 	unsigned long norm;
    350   1.1  tshiozak 
    351   1.6       mrg 	norm = 0;	/* XXX gcc */
    352   1.6       mrg 
    353   1.1  tshiozak 	ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER);
    354   1.1  tshiozak 	if (ret)
    355   1.1  tshiozak 		return ret;
    356   1.1  tshiozak 
    357   1.4  tshiozak 	realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1),
    358   1.4  tshiozak 				_LOOKUP_CASE_IGNORE);
    359   1.4  tshiozak 	realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2),
    360   1.4  tshiozak 				_LOOKUP_CASE_IGNORE);
    361   1.1  tshiozak 	if (!strcmp(realsrc, realdst)) {
    362   1.1  tshiozak 		ret = get_none(maparea, rcsm);
    363   1.1  tshiozak 		if (ret == 0 && rnorm != NULL)
    364   1.1  tshiozak 			*rnorm = 0;
    365   1.1  tshiozak 		return ret;
    366   1.1  tshiozak 	}
    367   1.1  tshiozak 
    368   1.1  tshiozak 	snprintf(key, sizeof(key), "%s/%s", realsrc, realdst);
    369   1.1  tshiozak 
    370   1.1  tshiozak 	ret = _mapper_open(maparea, rcsm, key);
    371   1.1  tshiozak 	if (ret == 0) {
    372   1.1  tshiozak 		if (rnorm != NULL)
    373   1.1  tshiozak 			*rnorm = 0;
    374   1.1  tshiozak 		return 0;
    375   1.1  tshiozak 	}
    376   1.1  tshiozak 	if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0)
    377   1.1  tshiozak 		return ret;
    378   1.1  tshiozak 
    379   1.1  tshiozak 	ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm);
    380   1.1  tshiozak 	if (ret)
    381   1.1  tshiozak 		return ret;
    382   1.1  tshiozak 
    383   1.1  tshiozak 	ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst);
    384   1.1  tshiozak 	if (ret == 0 && rnorm != NULL)
    385   1.1  tshiozak 		*rnorm = norm;
    386   1.1  tshiozak 
    387   1.1  tshiozak 	return ret;
    388   1.1  tshiozak }
    389