Home | History | Annotate | Line # | Download | only in cd9660
cd9660_util.c revision 1.8
      1 /*	$NetBSD: cd9660_util.c,v 1.8 2006/12/08 22:04:56 enami Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley
      8  * by Pace Willisson (pace (at) blitz.com).  The Rock Ridge Extension
      9  * Support code is derived from software contributed to Berkeley
     10  * by Atsushi Murai (amurai (at) spec.co.jp).
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	@(#)cd9660_util.c	8.3 (Berkeley) 12/5/94
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: cd9660_util.c,v 1.8 2006/12/08 22:04:56 enami Exp $");
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/namei.h>
     45 #include <sys/resourcevar.h>
     46 #include <sys/kernel.h>
     47 #include <sys/file.h>
     48 #include <sys/stat.h>
     49 #include <sys/buf.h>
     50 #include <sys/proc.h>
     51 #include <sys/mount.h>
     52 #include <sys/vnode.h>
     53 #include <sys/malloc.h>
     54 #include <sys/dirent.h>
     55 
     56 #include <fs/cd9660/iso.h>
     57 #include <fs/cd9660/cd9660_extern.h>
     58 
     59 #include <fs/unicode.h>
     60 
     61 static u_int16_t wget(const u_char **, size_t *, int);
     62 static int wput(u_char *, size_t, u_int16_t, int);
     63 
     64 int cd9660_utf8_joliet = 1;
     65 
     66 /*
     67  * Get one character out of an iso filename
     68  * Return number of bytes consumed
     69  */
     70 int
     71 isochar(isofn, isoend, joliet_level, c)
     72 	const u_char *isofn;
     73 	const u_char *isoend;
     74 	int joliet_level;
     75 	u_int16_t *c;
     76 {
     77 	*c = isofn[0];
     78 	if (joliet_level == 0 || isofn + 1 == isoend) {
     79 		/* (00) and (01) are one byte in Joliet, too */
     80 		return 1;
     81 	}
     82 
     83 	if (cd9660_utf8_joliet) {
     84 		*c = (*c << 8) + isofn[1];
     85 	} else {
     86 		/* characters outside ISO-8859-1 subset replaced with '?' */
     87 		if (*c != 0)
     88 			*c = '?';
     89 		else
     90 			*c = isofn[1];
     91 	}
     92 
     93 	return 2;
     94 }
     95 
     96 /*
     97  * translate and compare a filename
     98  * Note: Version number plus ';' may be omitted.
     99  */
    100 int
    101 isofncmp(fn, fnlen, isofn, isolen, joliet_level)
    102 	const u_char *fn, *isofn;
    103 	size_t fnlen, isolen;
    104 	int joliet_level;
    105 {
    106 	int i, j;
    107 	u_int16_t fc, ic;
    108 	const u_char *isoend = isofn + isolen;
    109 
    110 	while (fnlen > 0) {
    111 		fc = wget(&fn, &fnlen, joliet_level);
    112 
    113 		if (isofn == isoend)
    114 			return fc;
    115 		isofn += isochar(isofn, isoend, joliet_level, &ic);
    116 		if (ic == ';') {
    117 			switch (fc) {
    118 			default:
    119 				return fc;
    120 			case 0:
    121 				return 0;
    122 			case ';':
    123 				break;
    124 			}
    125 			fn++;
    126 			for (i = 0; fnlen-- != 0; i = i * 10 + *fn++ - '0') {
    127 				if (*fn < '0' || *fn > '9') {
    128 					return -1;
    129 				}
    130 			}
    131 			for (j = 0; isofn != isoend; j = j * 10 + ic - '0')
    132 				isofn += isochar(isofn, isoend,
    133 						 joliet_level, &ic);
    134 			return i - j;
    135 		}
    136 		if (ic != fc) {
    137 			if (ic >= 'A' && ic <= 'Z') {
    138 				if (ic + ('a' - 'A') != fc) {
    139 					if (fc >= 'a' && fc <= 'z')
    140 						fc -= 'a' - 'A';
    141 
    142 					return (int) fc - (int) ic;
    143 				}
    144 			} else
    145 				return (int) fc - (int) ic;
    146 		}
    147 	}
    148 	if (isofn != isoend) {
    149 		isofn += isochar(isofn, isoend, joliet_level, &ic);
    150 		switch (ic) {
    151 		default:
    152 			return -1;
    153 		case '.':
    154 			if (isofn != isoend) {
    155 				isochar(isofn, isoend, joliet_level, &ic);
    156 				if (ic == ';')
    157 					return 0;
    158 			}
    159 			return -1;
    160 		case ';':
    161 			return 0;
    162 		}
    163 	}
    164 	return 0;
    165 }
    166 
    167 /*
    168  * translate a filename
    169  */
    170 void
    171 isofntrans(infn, infnlen, outfn, outfnlen, original, casetrans, assoc, joliet_level)
    172 	u_char *infn, *outfn;
    173 	int infnlen;
    174 	u_short *outfnlen;
    175 	int original;
    176 	int casetrans;
    177 	int assoc;
    178 	int joliet_level;
    179 {
    180 	int fnidx = 0;
    181 	u_char *infnend = infn + infnlen;
    182 	u_int16_t c;
    183 	int sz;
    184 
    185 	if (assoc) {
    186 		*outfn++ = ASSOCCHAR;
    187 		fnidx++;
    188 	}
    189 
    190 	for(; infn != infnend; fnidx += sz) {
    191 		infn += isochar(infn, infnend, joliet_level, &c);
    192 
    193 		if (casetrans && joliet_level == 0 && c >= 'A' && c <= 'Z')
    194 			c = c + ('a' - 'A');
    195 		else if (!original && c == ';') {
    196 			if (fnidx > 0 && outfn[-1] == '.')
    197 				fnidx--;
    198 			break;
    199 		}
    200 
    201 		sz = wput(outfn, MAXNAMLEN - fnidx, c, joliet_level);
    202 		if (sz == 0) {
    203 			/* not enough space to write the character */
    204 			if (fnidx < MAXNAMLEN) {
    205 				*outfn = '?';
    206 				fnidx++;
    207 			}
    208 			break;
    209 		}
    210 		outfn += sz;
    211 	}
    212 	*outfnlen = fnidx;
    213 }
    214 
    215 static u_int16_t
    216 wget(const u_char **str, size_t *sz, int joliet_level)
    217 {
    218 	if (joliet_level > 0 && cd9660_utf8_joliet) {
    219 		/* decode UTF-8 sequence */
    220 		return wget_utf8((const char **) str, sz);
    221 	} else {
    222 		/*
    223 		 * Raw 8-bit characters without any conversion. For Joliet,
    224 		 * this effectively assumes provided file name is using
    225 		 * ISO-8859-1 subset.
    226 		 */
    227 		u_int16_t c = *str[0];
    228 		(*str)++;
    229 		(*sz)--;
    230 
    231 		return c;
    232 	}
    233 }
    234 
    235 static int
    236 wput(u_char *s, size_t n, u_int16_t c, int joliet_level)
    237 {
    238 	if (joliet_level > 0 && cd9660_utf8_joliet) {
    239 		/* Store Joliet file name encoded into UTF-8 */
    240 		return wput_utf8((char *)s, n, c);
    241 	} else {
    242 		/*
    243 		 * Store raw 8-bit characters without any conversion.
    244 		 * For Joliet case, this filters the Unicode characters
    245 		 * to ISO-8859-1 subset.
    246 		 */
    247 		*s = (u_char)c;
    248 		return 1;
    249 	}
    250 }
    251