1 1.12 christos /* $NetBSD: citrus_csmapper.c,v 1.12 2019/10/09 23:23:41 christos Exp $ */ 2 1.1 tshiozak 3 1.1 tshiozak /*- 4 1.1 tshiozak * Copyright (c)2003 Citrus Project, 5 1.1 tshiozak * All rights reserved. 6 1.1 tshiozak * 7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without 8 1.1 tshiozak * modification, are permitted provided that the following conditions 9 1.1 tshiozak * are met: 10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright 11 1.1 tshiozak * notice, this list of conditions and the following disclaimer. 12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the 14 1.1 tshiozak * documentation and/or other materials provided with the distribution. 15 1.1 tshiozak * 16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 1.1 tshiozak * SUCH DAMAGE. 27 1.1 tshiozak */ 28 1.1 tshiozak 29 1.1 tshiozak #include <sys/cdefs.h> 30 1.1 tshiozak #if defined(LIBC_SCCS) && !defined(lint) 31 1.12 christos __RCSID("$NetBSD: citrus_csmapper.c,v 1.12 2019/10/09 23:23:41 christos Exp $"); 32 1.1 tshiozak #endif /* LIBC_SCCS and not lint */ 33 1.1 tshiozak 34 1.1 tshiozak #include "namespace.h" 35 1.1 tshiozak #include "reentrant.h" 36 1.1 tshiozak #include <assert.h> 37 1.1 tshiozak #include <stdio.h> 38 1.1 tshiozak #include <stdlib.h> 39 1.1 tshiozak #include <string.h> 40 1.1 tshiozak #include <errno.h> 41 1.1 tshiozak #include <limits.h> 42 1.1 tshiozak #include <paths.h> 43 1.1 tshiozak #include <sys/types.h> 44 1.1 tshiozak #include <sys/queue.h> 45 1.1 tshiozak 46 1.1 tshiozak #include "citrus_namespace.h" 47 1.1 tshiozak #include "citrus_types.h" 48 1.1 tshiozak #include "citrus_bcs.h" 49 1.1 tshiozak #include "citrus_region.h" 50 1.1 tshiozak #include "citrus_memstream.h" 51 1.1 tshiozak #include "citrus_mmap.h" 52 1.1 tshiozak #include "citrus_module.h" 53 1.1 tshiozak #include "citrus_hash.h" 54 1.1 tshiozak #include "citrus_mapper.h" 55 1.1 tshiozak #include "citrus_csmapper.h" 56 1.1 tshiozak #include "citrus_pivot_file.h" 57 1.1 tshiozak #include "citrus_db.h" 58 1.1 tshiozak #include "citrus_db_hash.h" 59 1.1 tshiozak #include "citrus_lookup.h" 60 1.1 tshiozak 61 1.1 tshiozak #ifdef _REENTRANT 62 1.1 tshiozak static rwlock_t lock = RWLOCK_INITIALIZER; 63 1.1 tshiozak #endif 64 1.1 tshiozak static struct _citrus_mapper_area *maparea = NULL; 65 1.1 tshiozak 66 1.1 tshiozak #define CS_ALIAS _PATH_CSMAPPER "/charset.alias" 67 1.1 tshiozak #define CS_PIVOT _PATH_CSMAPPER "/charset.pivot" 68 1.1 tshiozak 69 1.1 tshiozak 70 1.1 tshiozak /* ---------------------------------------------------------------------- */ 71 1.1 tshiozak 72 1.1 tshiozak static int 73 1.8 junyoung get32(struct _region *r, uint32_t *rval) 74 1.1 tshiozak { 75 1.1 tshiozak if (_region_size(r) != 4) 76 1.1 tshiozak return EFTYPE; 77 1.1 tshiozak 78 1.5 christos memcpy(rval, _region_head(r), (size_t)4); 79 1.1 tshiozak *rval = be32toh(*rval); 80 1.1 tshiozak 81 1.1 tshiozak return 0; 82 1.1 tshiozak } 83 1.1 tshiozak 84 1.1 tshiozak static int 85 1.1 tshiozak open_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src) 86 1.1 tshiozak { 87 1.1 tshiozak int ret; 88 1.1 tshiozak struct _region r; 89 1.1 tshiozak 90 1.1 tshiozak ret = _db_lookup_by_s(db, src, &r, NULL); 91 1.1 tshiozak if (ret) 92 1.1 tshiozak return ret; 93 1.1 tshiozak ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL); 94 1.1 tshiozak if (ret) 95 1.1 tshiozak return ret; 96 1.1 tshiozak 97 1.1 tshiozak return 0; 98 1.1 tshiozak } 99 1.1 tshiozak 100 1.1 tshiozak 101 1.1 tshiozak #define NO_SUCH_FILE EOPNOTSUPP 102 1.1 tshiozak static int 103 1.1 tshiozak find_best_pivot_pvdb(const char *src, const char *dst, char *pivot, 104 1.1 tshiozak size_t pvlen, unsigned long *rnorm) 105 1.1 tshiozak { 106 1.1 tshiozak int ret, num, i; 107 1.1 tshiozak struct _region fr, r1, r2; 108 1.1 tshiozak struct _citrus_db *db1, *db2, *db3; 109 1.1 tshiozak char buf[LINE_MAX]; 110 1.1 tshiozak unsigned long norm; 111 1.8 junyoung uint32_t val32; 112 1.1 tshiozak 113 1.1 tshiozak ret = _map_file(&fr, CS_PIVOT ".pvdb"); 114 1.1 tshiozak if (ret) { 115 1.1 tshiozak if (ret == ENOENT) 116 1.1 tshiozak ret = NO_SUCH_FILE; 117 1.1 tshiozak return ret; 118 1.1 tshiozak } 119 1.1 tshiozak ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL); 120 1.1 tshiozak if (ret) 121 1.1 tshiozak goto quit1; 122 1.1 tshiozak ret = open_subdb(&db2, db1, src); 123 1.1 tshiozak if (ret) 124 1.1 tshiozak goto quit2; 125 1.1 tshiozak 126 1.1 tshiozak num = _db_get_num_entries(db2); 127 1.1 tshiozak *rnorm = ULONG_MAX; 128 1.3 itojun for (i = 0; i < num; i++) { 129 1.1 tshiozak /* iterate each pivot */ 130 1.1 tshiozak ret = _db_get_entry(db2, i, &r1, &r2); 131 1.1 tshiozak if (ret) 132 1.1 tshiozak goto quit3; 133 1.1 tshiozak /* r1:pivot name, r2:norm among src and pivot */ 134 1.1 tshiozak ret = get32(&r2, &val32); 135 1.1 tshiozak if (ret) 136 1.1 tshiozak goto quit3; 137 1.1 tshiozak norm = val32; 138 1.1 tshiozak snprintf(buf, sizeof(buf), "%.*s", 139 1.1 tshiozak (int)_region_size(&r1), (char *)_region_head(&r1)); 140 1.1 tshiozak /* buf: pivot name */ 141 1.1 tshiozak ret = open_subdb(&db3, db1, buf); 142 1.1 tshiozak if (ret) 143 1.1 tshiozak goto quit3; 144 1.7 tnozaki if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0) 145 1.11 tnozaki /* don't break the loop, test all src/dst pairs. */ 146 1.1 tshiozak goto quit4; 147 1.1 tshiozak /* r2: norm among pivot and dst */ 148 1.1 tshiozak ret = get32(&r2, &val32); 149 1.1 tshiozak if (ret) 150 1.1 tshiozak goto quit4; 151 1.1 tshiozak norm += val32; 152 1.1 tshiozak /* judge minimum norm */ 153 1.1 tshiozak if (norm < *rnorm) { 154 1.1 tshiozak *rnorm = norm; 155 1.1 tshiozak strlcpy(pivot, buf, pvlen); 156 1.1 tshiozak } 157 1.1 tshiozak quit4: 158 1.1 tshiozak _db_close(db3); 159 1.1 tshiozak if (ret) 160 1.1 tshiozak goto quit3; 161 1.1 tshiozak } 162 1.1 tshiozak quit3: 163 1.1 tshiozak _db_close(db2); 164 1.1 tshiozak quit2: 165 1.1 tshiozak _db_close(db1); 166 1.1 tshiozak quit1: 167 1.1 tshiozak _unmap_file(&fr); 168 1.1 tshiozak if (ret) 169 1.1 tshiozak return ret; 170 1.1 tshiozak 171 1.1 tshiozak if (*rnorm == ULONG_MAX) 172 1.1 tshiozak return ENOENT; 173 1.1 tshiozak 174 1.1 tshiozak return 0; 175 1.1 tshiozak } 176 1.1 tshiozak 177 1.1 tshiozak /* ---------------------------------------------------------------------- */ 178 1.1 tshiozak 179 1.1 tshiozak struct zone { 180 1.1 tshiozak const char *begin, *end; 181 1.1 tshiozak }; 182 1.1 tshiozak 183 1.1 tshiozak struct parse_arg { 184 1.1 tshiozak char dst[PATH_MAX]; 185 1.1 tshiozak unsigned long norm; 186 1.1 tshiozak }; 187 1.1 tshiozak 188 1.1 tshiozak static int 189 1.1 tshiozak parse_line(struct parse_arg *pa, struct _region *r) 190 1.1 tshiozak { 191 1.1 tshiozak char buf[20]; 192 1.1 tshiozak struct zone z1, z2; 193 1.1 tshiozak size_t len; 194 1.1 tshiozak 195 1.1 tshiozak len = _region_size(r); 196 1.1 tshiozak z1.begin = _bcs_skip_ws_len(_region_head(r), &len); 197 1.1 tshiozak if (len == 0) 198 1.1 tshiozak return EFTYPE; 199 1.1 tshiozak z1.end = _bcs_skip_nonws_len(z1.begin, &len); 200 1.1 tshiozak if (len == 0) 201 1.1 tshiozak return EFTYPE; 202 1.1 tshiozak z2.begin = _bcs_skip_ws_len(z1.end, &len); 203 1.1 tshiozak if (len == 0) 204 1.1 tshiozak return EFTYPE; 205 1.1 tshiozak z2.end = _bcs_skip_nonws_len(z2.begin, &len); 206 1.1 tshiozak 207 1.1 tshiozak /* z1 : dst name, z2 : norm */ 208 1.1 tshiozak snprintf(pa->dst, sizeof(pa->dst), 209 1.1 tshiozak "%.*s", (int)(z1.end-z1.begin), z1.begin); 210 1.1 tshiozak snprintf(buf, sizeof(buf), 211 1.1 tshiozak "%.*s", (int)(z2.end-z2.begin), z2.begin); 212 1.9 tnozaki pa->norm = _bcs_strtoul(buf, NULL, 0); 213 1.1 tshiozak 214 1.1 tshiozak return 0; 215 1.1 tshiozak } 216 1.1 tshiozak 217 1.1 tshiozak static int 218 1.1 tshiozak find_dst(struct parse_arg *pasrc, const char *dst) 219 1.1 tshiozak { 220 1.1 tshiozak int ret; 221 1.1 tshiozak struct parse_arg padst; 222 1.1 tshiozak struct _lookup *cl; 223 1.1 tshiozak struct _region data; 224 1.1 tshiozak 225 1.4 tshiozak ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 226 1.1 tshiozak if (ret) 227 1.1 tshiozak return ret; 228 1.1 tshiozak 229 1.1 tshiozak ret = _lookup_seq_lookup(cl, pasrc->dst, &data); 230 1.1 tshiozak while (ret == 0) { 231 1.1 tshiozak ret = parse_line(&padst, &data); 232 1.1 tshiozak if (ret) 233 1.1 tshiozak break; 234 1.1 tshiozak if (strcmp(dst, padst.dst) == 0) { 235 1.1 tshiozak pasrc->norm += padst.norm; 236 1.1 tshiozak break; 237 1.1 tshiozak } 238 1.1 tshiozak ret = _lookup_seq_next(cl, NULL, &data); 239 1.1 tshiozak } 240 1.1 tshiozak _lookup_seq_close(cl); 241 1.1 tshiozak 242 1.1 tshiozak return ret; 243 1.1 tshiozak } 244 1.1 tshiozak 245 1.1 tshiozak static int 246 1.1 tshiozak find_best_pivot_lookup(const char *src, const char *dst, char *pivot, 247 1.1 tshiozak size_t pvlen, unsigned long *rnorm) 248 1.1 tshiozak { 249 1.1 tshiozak int ret; 250 1.1 tshiozak struct _lookup *cl; 251 1.1 tshiozak struct _region data; 252 1.1 tshiozak struct parse_arg pa; 253 1.1 tshiozak unsigned long norm_min; 254 1.1 tshiozak char pivot_min[PATH_MAX]; 255 1.1 tshiozak 256 1.4 tshiozak ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 257 1.1 tshiozak if (ret) 258 1.1 tshiozak return ret; 259 1.1 tshiozak 260 1.1 tshiozak norm_min = ULONG_MAX; 261 1.1 tshiozak 262 1.1 tshiozak /* find pivot code */ 263 1.1 tshiozak ret = _lookup_seq_lookup(cl, src, &data); 264 1.1 tshiozak while (ret == 0) { 265 1.1 tshiozak ret = parse_line(&pa, &data); 266 1.1 tshiozak if (ret) 267 1.1 tshiozak break; 268 1.1 tshiozak ret = find_dst(&pa, dst); 269 1.1 tshiozak if (ret) 270 1.1 tshiozak break; 271 1.1 tshiozak if (pa.norm < norm_min) { 272 1.1 tshiozak norm_min = pa.norm; 273 1.2 itojun strlcpy(pivot_min, pa.dst, sizeof(pivot_min)); 274 1.1 tshiozak } 275 1.1 tshiozak ret = _lookup_seq_next(cl, NULL, &data); 276 1.1 tshiozak } 277 1.1 tshiozak _lookup_seq_close(cl); 278 1.1 tshiozak 279 1.1 tshiozak if (ret != ENOENT) 280 1.1 tshiozak return ret; 281 1.1 tshiozak if (norm_min == ULONG_MAX) 282 1.1 tshiozak return ENOENT; 283 1.1 tshiozak strlcpy(pivot, pivot_min, pvlen); 284 1.1 tshiozak if (rnorm) 285 1.1 tshiozak *rnorm = norm_min; 286 1.1 tshiozak 287 1.1 tshiozak return 0; 288 1.1 tshiozak } 289 1.1 tshiozak 290 1.1 tshiozak static int 291 1.1 tshiozak find_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen, 292 1.1 tshiozak unsigned long *rnorm) 293 1.1 tshiozak { 294 1.1 tshiozak int ret; 295 1.1 tshiozak 296 1.1 tshiozak ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm); 297 1.1 tshiozak if (ret == NO_SUCH_FILE) 298 1.1 tshiozak ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm); 299 1.1 tshiozak 300 1.1 tshiozak return ret; 301 1.1 tshiozak } 302 1.1 tshiozak 303 1.1 tshiozak static __inline int 304 1.1 tshiozak open_serial_mapper(struct _citrus_mapper_area *__restrict ma, 305 1.1 tshiozak struct _citrus_mapper * __restrict * __restrict rcm, 306 1.1 tshiozak const char *src, const char *pivot, const char *dst) 307 1.1 tshiozak { 308 1.12 christos char buf[4 * PATH_MAX]; 309 1.1 tshiozak 310 1.1 tshiozak snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst); 311 1.1 tshiozak 312 1.1 tshiozak return _mapper_open_direct(ma, rcm, "mapper_serial", buf); 313 1.1 tshiozak } 314 1.1 tshiozak 315 1.1 tshiozak static struct _citrus_csmapper *csm_none = NULL; 316 1.1 tshiozak static int 317 1.1 tshiozak get_none(struct _citrus_mapper_area *__restrict ma, 318 1.1 tshiozak struct _citrus_csmapper *__restrict *__restrict rcsm) 319 1.1 tshiozak { 320 1.1 tshiozak int ret; 321 1.1 tshiozak 322 1.1 tshiozak rwlock_wrlock(&lock); 323 1.1 tshiozak if (csm_none) { 324 1.1 tshiozak *rcsm = csm_none; 325 1.1 tshiozak ret = 0; 326 1.1 tshiozak goto quit; 327 1.1 tshiozak } 328 1.1 tshiozak 329 1.1 tshiozak ret = _mapper_open_direct(ma, &csm_none, "mapper_none", ""); 330 1.1 tshiozak if (ret) 331 1.1 tshiozak goto quit; 332 1.1 tshiozak _mapper_set_persistent(csm_none); 333 1.1 tshiozak 334 1.1 tshiozak *rcsm = csm_none; 335 1.1 tshiozak ret = 0; 336 1.1 tshiozak quit: 337 1.1 tshiozak rwlock_unlock(&lock); 338 1.1 tshiozak return ret; 339 1.1 tshiozak } 340 1.1 tshiozak 341 1.1 tshiozak int 342 1.1 tshiozak _citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm, 343 1.1 tshiozak const char * __restrict src, const char * __restrict dst, 344 1.8 junyoung uint32_t flags, unsigned long *rnorm) 345 1.1 tshiozak { 346 1.1 tshiozak int ret; 347 1.1 tshiozak char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX]; 348 1.1 tshiozak const char *realsrc, *realdst; 349 1.1 tshiozak unsigned long norm; 350 1.1 tshiozak 351 1.6 mrg norm = 0; /* XXX gcc */ 352 1.6 mrg 353 1.1 tshiozak ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER); 354 1.1 tshiozak if (ret) 355 1.1 tshiozak return ret; 356 1.1 tshiozak 357 1.4 tshiozak realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1), 358 1.4 tshiozak _LOOKUP_CASE_IGNORE); 359 1.4 tshiozak realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2), 360 1.4 tshiozak _LOOKUP_CASE_IGNORE); 361 1.1 tshiozak if (!strcmp(realsrc, realdst)) { 362 1.1 tshiozak ret = get_none(maparea, rcsm); 363 1.1 tshiozak if (ret == 0 && rnorm != NULL) 364 1.1 tshiozak *rnorm = 0; 365 1.1 tshiozak return ret; 366 1.1 tshiozak } 367 1.1 tshiozak 368 1.1 tshiozak snprintf(key, sizeof(key), "%s/%s", realsrc, realdst); 369 1.1 tshiozak 370 1.1 tshiozak ret = _mapper_open(maparea, rcsm, key); 371 1.1 tshiozak if (ret == 0) { 372 1.1 tshiozak if (rnorm != NULL) 373 1.1 tshiozak *rnorm = 0; 374 1.1 tshiozak return 0; 375 1.1 tshiozak } 376 1.1 tshiozak if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0) 377 1.1 tshiozak return ret; 378 1.1 tshiozak 379 1.1 tshiozak ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm); 380 1.1 tshiozak if (ret) 381 1.1 tshiozak return ret; 382 1.1 tshiozak 383 1.1 tshiozak ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst); 384 1.1 tshiozak if (ret == 0 && rnorm != NULL) 385 1.1 tshiozak *rnorm = norm; 386 1.1 tshiozak 387 1.1 tshiozak return ret; 388 1.1 tshiozak } 389