Home | History | Annotate | Line # | Download | only in udf
udf_osta.c revision 1.5.14.1
      1 /* $NetBSD: udf_osta.c,v 1.5.14.1 2008/06/23 04:31:50 wrstuden Exp $ */
      2 
      3 #include <sys/cdefs.h>
      4 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.5.14.1 2008/06/23 04:31:50 wrstuden Exp $");
      5 
      6 /*
      7  * Various routines from the OSTA 2.01 specs.  Copyrights are included with
      8  * each code segment.  Slight whitespace modifications have been made for
      9  * formatting purposes.  Typos/bugs have been fixed.
     10  *
     11  */
     12 
     13 #include "udf_osta.h"
     14 
     15 #ifndef _KERNEL
     16 #include <ctype.h>
     17 #endif
     18 
     19 /*****************************************************************************/
     20 /***********************************************************************
     21  * OSTA compliant Unicode compression, uncompression routines.
     22  * Copyright 1995 Micro Design International, Inc.
     23  * Written by Jason M. Rinn.
     24  * Micro Design International gives permission for the free use of the
     25  * following source code.
     26  */
     27 
     28 /***********************************************************************
     29  * Takes an OSTA CS0 compressed unicode name, and converts
     30  * it to Unicode.
     31  * The Unicode output will be in the byte order
     32  * that the local compiler uses for 16-bit values.
     33  * NOTE: This routine only performs error checking on the compID.
     34  * It is up to the user to ensure that the unicode buffer is large
     35  * enough, and that the compressed unicode name is correct.
     36  *
     37  * RETURN VALUE
     38  *
     39  * The number of unicode characters which were uncompressed.
     40  * A -1 is returned if the compression ID is invalid.
     41  */
     42 int
     43 udf_UncompressUnicode(
     44 	int numberOfBytes,	/* (Input) number of bytes read from media. */
     45 	byte *UDFCompressed,	/* (Input) bytes read from media. */
     46 	unicode_t *unicode)	/* (Output) uncompressed unicode characters. */
     47 {
     48 	unsigned int compID;
     49 	int returnValue, unicodeIndex, byteIndex;
     50 
     51 	/* Use UDFCompressed to store current byte being read. */
     52 	compID = UDFCompressed[0];
     53 
     54 	/* First check for valid compID. */
     55 	if (compID != 8 && compID != 16) {
     56 		returnValue = -1;
     57 	} else {
     58 		unicodeIndex = 0;
     59 		byteIndex = 1;
     60 
     61 		/* Loop through all the bytes. */
     62 		while (byteIndex < numberOfBytes) {
     63 			if (compID == 16) {
     64 				/* Move the first byte to the high bits of the
     65 				 * unicode char.
     66 				 */
     67 				unicode[unicodeIndex] =
     68 				    UDFCompressed[byteIndex++] << 8;
     69 			} else {
     70 				unicode[unicodeIndex] = 0;
     71 			}
     72 			if (byteIndex < numberOfBytes) {
     73 				/*Then the next byte to the low bits. */
     74 				unicode[unicodeIndex] |=
     75 				    UDFCompressed[byteIndex++];
     76 			}
     77 			unicodeIndex++;
     78 		}
     79 		returnValue = unicodeIndex;
     80 	}
     81 	return(returnValue);
     82 }
     83 
     84 /***********************************************************************
     85  * DESCRIPTION:
     86  * Takes a string of unicode wide characters and returns an OSTA CS0
     87  * compressed unicode string. The unicode MUST be in the byte order of
     88  * the compiler in order to obtain correct results. Returns an error
     89  * if the compression ID is invalid.
     90  *
     91  * NOTE: This routine assumes the implementation already knows, by
     92  * the local environment, how many bits are appropriate and
     93  * therefore does no checking to test if the input characters fit
     94  * into that number of bits or not.
     95  *
     96  * RETURN VALUE
     97  *
     98  * The total number of bytes in the compressed OSTA CS0 string,
     99  * including the compression ID.
    100  * A -1 is returned if the compression ID is invalid.
    101  */
    102 int
    103 udf_CompressUnicode(
    104 	int numberOfChars,	/* (Input) number of unicode characters. */
    105 	int compID,		/* (Input) compression ID to be used. */
    106 	unicode_t *unicode,	/* (Input) unicode characters to compress. */
    107 	byte *UDFCompressed)	/* (Output) compressed string, as bytes. */
    108 {
    109 	int byteIndex, unicodeIndex;
    110 
    111 	if (compID != 8 && compID != 16) {
    112 		byteIndex = -1; /* Unsupported compression ID ! */
    113 	} else {
    114 		/* Place compression code in first byte. */
    115 		UDFCompressed[0] = compID;
    116 
    117 		byteIndex = 1;
    118 		unicodeIndex = 0;
    119 		while (unicodeIndex < numberOfChars) {
    120 			if (compID == 16) {
    121 				/* First, place the high bits of the char
    122 				 * into the byte stream.
    123 				 */
    124 				UDFCompressed[byteIndex++] =
    125 				    (unicode[unicodeIndex] & 0xFF00) >> 8;
    126 			}
    127 			/*Then place the low bits into the stream. */
    128 			UDFCompressed[byteIndex++] =
    129 			    unicode[unicodeIndex] & 0x00FF;
    130 			unicodeIndex++;
    131 		}
    132 	}
    133 	return(byteIndex);
    134 }
    135 
    136 /*****************************************************************************/
    137 /*
    138  * CRC 010041
    139  */
    140 static unsigned short crc_table[256] = {
    141 	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
    142 	0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
    143 	0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
    144 	0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
    145 	0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
    146 	0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
    147 	0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
    148 	0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
    149 	0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
    150 	0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
    151 	0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
    152 	0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
    153 	0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
    154 	0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
    155 	0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
    156 	0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
    157 	0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
    158 	0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
    159 	0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
    160 	0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
    161 	0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
    162 	0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
    163 	0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
    164 	0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
    165 	0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
    166 	0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
    167 	0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
    168 	0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
    169 	0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
    170 	0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
    171 	0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
    172 	0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
    173 };
    174 
    175 unsigned short
    176 udf_cksum(s, n)
    177 	unsigned char *s;
    178 	int n;
    179 {
    180 	unsigned short crc=0;
    181 
    182 	while (n-- > 0)
    183 		crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
    184 	return crc;
    185 }
    186 
    187 /* UNICODE Checksum */
    188 unsigned short
    189 udf_unicode_cksum(s, n)
    190 	unsigned short *s;
    191 	int n;
    192 {
    193 	unsigned short crc=0;
    194 
    195 	while (n-- > 0) {
    196 		/* Take high order byte first--corresponds to a big endian
    197 		 * byte stream.
    198 		 */
    199 		crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
    200 		crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
    201 	}
    202 	return crc;
    203 }
    204 
    205 
    206 /*
    207   * Calculates a 16-bit checksum of the Implementation Use
    208   * Extended Attribute header or Application Use Extended Attribute
    209   * header. The fields AttributeType through ImplementationIdentifier
    210   * (or ApplicationIdentifier) inclusively represent the
    211   * data covered by the checksum (48 bytes).
    212   *
    213   */
    214 uint16_t udf_ea_cksum(uint8_t *data) {
    215         uint16_t checksum = 0;
    216         int      count;
    217 
    218         for (count = 0; count < 48; count++) {
    219                checksum += *data++;
    220         }
    221 
    222         return checksum;
    223 }
    224 
    225 
    226 #ifdef MAIN
    227 unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
    228 
    229 main()
    230 {
    231 	unsigned short x;
    232 	x = cksum(bytes, sizeof bytes);
    233 	printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
    234 	exit(0);
    235 }
    236 #endif
    237 
    238 /*****************************************************************************/
    239 /* #ifdef NEEDS_ISPRINT */
    240 /***********************************************************************
    241  * OSTA UDF compliant file name translation routine for OS/2,
    242  * Windows 95, Windows NT, Macintosh and UNIX.
    243  * Copyright 1995 Micro Design International, Inc.
    244  * Written by Jason M. Rinn.
    245  * Micro Design International gives permission for the free use of the
    246  * following source code.
    247  */
    248 
    249 /***********************************************************************
    250  * To use these routines with different operating systems.
    251  *
    252  * OS/2
    253  * Define OS2
    254  * Define MAXLEN = 254
    255  *
    256  * Windows 95
    257  * Define WIN_95
    258  * Define MAXLEN = 255
    259  *
    260  * Windows NT
    261  * Define WIN_NT
    262  * Define MAXLEN = 255
    263  *
    264  * Macintosh:
    265  * Define MAC.
    266  * Define MAXLEN = 31.
    267  *
    268  * UNIX
    269  * Define UNIX.
    270  * Define MAXLEN as specified by unix version.
    271  */
    272 
    273 #define	ILLEGAL_CHAR_MARK	0x005F
    274 #define	CRC_MARK	0x0023
    275 #define	EXT_SIZE	5
    276 #define	PERIOD	0x002E
    277 #define	SPACE	0x0020
    278 
    279 /*** PROTOTYPES ***/
    280 int IsIllegal(unicode_t ch);
    281 
    282 /* Define a function or macro which determines if a Unicode character is
    283  * printable under your implementation.
    284  */
    285 
    286 
    287 /* #include <stdio.h> */
    288 static int UnicodeIsPrint(unicode_t ch) {
    289 	return (ch >=' ') && (ch != 127);
    290 }
    291 
    292 
    293 int UnicodeLength(unicode_t *string) {
    294 	int length;
    295 	length = 0;
    296 	while (*string++) length++;
    297 
    298 	return length;
    299 }
    300 
    301 
    302 #ifdef _KERNEL
    303 static int isprint(int c) {
    304 	return (c >= ' ') && (c != 127);
    305 }
    306 #endif
    307 
    308 
    309 /***********************************************************************
    310  * Translates a long file name to one using a MAXLEN and an illegal
    311  * char set in accord with the OSTA requirements. Assumes the name has
    312  * already been translated to Unicode.
    313  *
    314  * RETURN VALUE
    315  *
    316  * Number of unicode characters in translated name.
    317  */
    318 int UDFTransName(
    319 	unicode_t *newName,	/* (Output)Translated name. Must be of length
    320 				 * MAXLEN */
    321 	unicode_t *udfName,	/* (Input) Name from UDF volume.*/
    322 	int udfLen)		/* (Input) Length of UDF Name. */
    323 {
    324 	int Index, newIndex = 0, needsCRC = false;	/* index is shadowed */
    325 	int extIndex = 0, newExtIndex = 0, hasExt = false;
    326 #if defined OS2 || defined WIN_95 || defined WIN_NT
    327 	int trailIndex = 0;
    328 #endif
    329 	unsigned short valueCRC;
    330 	unicode_t current;
    331 	const char hexChar[] = "0123456789ABCDEF";
    332 
    333 	for (Index = 0; Index < udfLen; Index++) {
    334 		current = udfName[Index];
    335 
    336 		if (IsIllegal(current) || !UnicodeIsPrint(current)) {
    337 			needsCRC = true;
    338 			/* Replace Illegal and non-displayable chars with
    339 			 * underscore.
    340 			 */
    341 			current = ILLEGAL_CHAR_MARK;
    342 			/* Skip any other illegal or non-displayable
    343 			 * characters.
    344 			 */
    345 			while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
    346 			    || !UnicodeIsPrint(udfName[Index+1]))) {
    347 				Index++;
    348 			}
    349 		}
    350 
    351 		/* Record position of extension, if one is found. */
    352 		if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
    353 			if (udfLen == Index + 1) {
    354 				/* A trailing period is NOT an extension. */
    355 				hasExt = false;
    356 			} else {
    357 				hasExt = true;
    358 				extIndex = Index;
    359 				newExtIndex = newIndex;
    360 			}
    361 		}
    362 
    363 #if defined OS2 || defined WIN_95 || defined WIN_NT
    364 		/* Record position of last char which is NOT period or space. */
    365 		else if (current != PERIOD && current != SPACE) {
    366 			trailIndex = newIndex;
    367 		}
    368 #endif
    369 
    370 		if (newIndex < MAXLEN) {
    371 			newName[newIndex++] = current;
    372 		} else {
    373 			needsCRC = true;
    374 		}
    375 	}
    376 
    377 #if defined OS2 || defined WIN_95 || defined WIN_NT
    378 	/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
    379 	if (trailIndex != newIndex - 1) {
    380 		newIndex = trailIndex + 1;
    381 		needsCRC = true;
    382 		hasExt = false; /* Trailing period does not make an
    383 				 * extension. */
    384 	}
    385 #endif
    386 
    387 	if (needsCRC) {
    388 		unicode_t ext[EXT_SIZE];
    389 		int localExtIndex = 0;
    390 		if (hasExt) {
    391 			int maxFilenameLen;
    392 			/* Translate extension, and store it in ext. */
    393 			for(Index = 0; Index<EXT_SIZE &&
    394 			    extIndex + Index +1 < udfLen; Index++ ) {
    395 				current = udfName[extIndex + Index + 1];
    396 				if (IsIllegal(current) ||
    397 				    !UnicodeIsPrint(current)) {
    398 					needsCRC = 1;
    399 					/* Replace Illegal and non-displayable
    400 					 * chars with underscore.
    401 					 */
    402 					current = ILLEGAL_CHAR_MARK;
    403 					/* Skip any other illegal or
    404 					 * non-displayable characters.
    405 					 */
    406 					while(Index + 1 < EXT_SIZE
    407 					    && (IsIllegal(udfName[extIndex +
    408 					    Index + 2]) ||
    409 					    !isprint(udfName[extIndex +
    410 					    Index + 2]))) {
    411 						Index++;
    412 					}
    413 				}
    414 				ext[localExtIndex++] = current;
    415 			}
    416 
    417 			/* Truncate filename to leave room for extension and
    418 			 * CRC.
    419 			 */
    420 			maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
    421 			if (newIndex > maxFilenameLen) {
    422 				newIndex = maxFilenameLen;
    423 			} else {
    424 				newIndex = newExtIndex;
    425 			}
    426 		} else if (newIndex > MAXLEN - 5) {
    427 			/*If no extension, make sure to leave room for CRC. */
    428 			newIndex = MAXLEN - 5;
    429 		}
    430 		newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
    431 
    432 		/*Calculate CRC from original filename from FileIdentifier. */
    433 		valueCRC = udf_unicode_cksum(udfName, udfLen);
    434 		/* Convert 16-bits of CRC to hex characters. */
    435 		newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
    436 		newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
    437 		newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
    438 		newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
    439 
    440 		/* Place a translated extension at end, if found. */
    441 		if (hasExt) {
    442 			newName[newIndex++] = PERIOD;
    443 			for (Index = 0;Index < localExtIndex ;Index++ ) {
    444 				newName[newIndex++] = ext[Index];
    445 			}
    446 		}
    447 	}
    448 	return(newIndex);
    449 }
    450 
    451 #if defined OS2 || defined WIN_95 || defined WIN_NT
    452 /***********************************************************************
    453  * Decides if a Unicode character matches one of a list
    454  * of ASCII characters.
    455  * Used by OS2 version of IsIllegal for readability, since all of the
    456  * illegal characters above 0x0020 are in the ASCII subset of Unicode.
    457  * Works very similarly to the standard C function strchr().
    458  *
    459  * RETURN VALUE
    460  *
    461  * Non-zero if the Unicode character is in the given ASCII string.
    462  */
    463 int UnicodeInString(
    464 	unsigned char *string,	/* (Input) String to search through. */
    465 	unicode_t ch)		/* (Input) Unicode char to search for. */
    466 {
    467 	int found = false;
    468 	while (*string != '\0' && found == false) {
    469 		/* These types should compare, since both are unsigned
    470 		 * numbers. */
    471 		if (*string == ch) {
    472 			found = true;
    473 		}
    474 		string++;
    475 	}
    476 	return(found);
    477 }
    478 #endif /* OS2 */
    479 
    480 /***********************************************************************
    481  * Decides whether the given character is illegal for a given OS.
    482  *
    483  * RETURN VALUE
    484  *
    485  * Non-zero if char is illegal.
    486  */
    487 int IsIllegal(unicode_t ch)
    488 {
    489 #ifdef MAC
    490 	/* Only illegal character on the MAC is the colon. */
    491 	if (ch == 0x003A) {
    492 		return(1);
    493 	} else {
    494 		return(0);
    495 	}
    496 
    497 #elif defined UNIX
    498 	/* Illegal UNIX characters are NULL and slash. */
    499 	if (ch == 0x0000 || ch == 0x002F) {
    500 		return(1);
    501 	} else {
    502 		return(0);
    503 	}
    504 
    505 #elif defined OS2 || defined WIN_95 || defined WIN_NT
    506 	/* Illegal char's for OS/2 according to WARP toolkit. */
    507 	if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
    508 		return(1);
    509 	} else {
    510 		return(0);
    511 	}
    512 #endif
    513 }
    514 /* #endif*/	/* NEEDS_ISPRINT */
    515 
    516