udf_osta.c revision 1.6 1 /* $NetBSD: udf_osta.c,v 1.6 2008/05/14 16:49:48 reinoud Exp $ */
2
3 #include <sys/cdefs.h>
4 #ifndef lint
5 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.6 2008/05/14 16:49:48 reinoud Exp $");
6 #endif /* not lint */
7
8 /*
9 * Various routines from the OSTA 2.01 specs. Copyrights are included with
10 * each code segment. Slight whitespace modifications have been made for
11 * formatting purposes. Typos/bugs have been fixed.
12 *
13 */
14
15 #include "udf_osta.h"
16
17 #ifndef _KERNEL
18 #include <ctype.h>
19 #endif
20
21 /*****************************************************************************/
22 /***********************************************************************
23 * OSTA compliant Unicode compression, uncompression routines.
24 * Copyright 1995 Micro Design International, Inc.
25 * Written by Jason M. Rinn.
26 * Micro Design International gives permission for the free use of the
27 * following source code.
28 */
29
30 /***********************************************************************
31 * Takes an OSTA CS0 compressed unicode name, and converts
32 * it to Unicode.
33 * The Unicode output will be in the byte order
34 * that the local compiler uses for 16-bit values.
35 * NOTE: This routine only performs error checking on the compID.
36 * It is up to the user to ensure that the unicode buffer is large
37 * enough, and that the compressed unicode name is correct.
38 *
39 * RETURN VALUE
40 *
41 * The number of unicode characters which were uncompressed.
42 * A -1 is returned if the compression ID is invalid.
43 */
44 int
45 udf_UncompressUnicode(
46 int numberOfBytes, /* (Input) number of bytes read from media. */
47 byte *UDFCompressed, /* (Input) bytes read from media. */
48 unicode_t *unicode) /* (Output) uncompressed unicode characters. */
49 {
50 unsigned int compID;
51 int returnValue, unicodeIndex, byteIndex;
52
53 /* Use UDFCompressed to store current byte being read. */
54 compID = UDFCompressed[0];
55
56 /* First check for valid compID. */
57 if (compID != 8 && compID != 16) {
58 returnValue = -1;
59 } else {
60 unicodeIndex = 0;
61 byteIndex = 1;
62
63 /* Loop through all the bytes. */
64 while (byteIndex < numberOfBytes) {
65 if (compID == 16) {
66 /* Move the first byte to the high bits of the
67 * unicode char.
68 */
69 unicode[unicodeIndex] =
70 UDFCompressed[byteIndex++] << 8;
71 } else {
72 unicode[unicodeIndex] = 0;
73 }
74 if (byteIndex < numberOfBytes) {
75 /*Then the next byte to the low bits. */
76 unicode[unicodeIndex] |=
77 UDFCompressed[byteIndex++];
78 }
79 unicodeIndex++;
80 }
81 returnValue = unicodeIndex;
82 }
83 return(returnValue);
84 }
85
86 /***********************************************************************
87 * DESCRIPTION:
88 * Takes a string of unicode wide characters and returns an OSTA CS0
89 * compressed unicode string. The unicode MUST be in the byte order of
90 * the compiler in order to obtain correct results. Returns an error
91 * if the compression ID is invalid.
92 *
93 * NOTE: This routine assumes the implementation already knows, by
94 * the local environment, how many bits are appropriate and
95 * therefore does no checking to test if the input characters fit
96 * into that number of bits or not.
97 *
98 * RETURN VALUE
99 *
100 * The total number of bytes in the compressed OSTA CS0 string,
101 * including the compression ID.
102 * A -1 is returned if the compression ID is invalid.
103 */
104 int
105 udf_CompressUnicode(
106 int numberOfChars, /* (Input) number of unicode characters. */
107 int compID, /* (Input) compression ID to be used. */
108 unicode_t *unicode, /* (Input) unicode characters to compress. */
109 byte *UDFCompressed) /* (Output) compressed string, as bytes. */
110 {
111 int byteIndex, unicodeIndex;
112
113 if (compID != 8 && compID != 16) {
114 byteIndex = -1; /* Unsupported compression ID ! */
115 } else {
116 /* Place compression code in first byte. */
117 UDFCompressed[0] = compID;
118
119 byteIndex = 1;
120 unicodeIndex = 0;
121 while (unicodeIndex < numberOfChars) {
122 if (compID == 16) {
123 /* First, place the high bits of the char
124 * into the byte stream.
125 */
126 UDFCompressed[byteIndex++] =
127 (unicode[unicodeIndex] & 0xFF00) >> 8;
128 }
129 /*Then place the low bits into the stream. */
130 UDFCompressed[byteIndex++] =
131 unicode[unicodeIndex] & 0x00FF;
132 unicodeIndex++;
133 }
134 }
135 return(byteIndex);
136 }
137
138 /*****************************************************************************/
139 /*
140 * CRC 010041
141 */
142 static unsigned short crc_table[256] = {
143 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
144 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
145 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
146 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
147 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
148 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
149 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
150 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
151 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
152 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
153 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
154 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
155 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
156 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
157 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
158 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
159 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
160 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
161 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
162 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
163 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
164 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
165 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
166 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
167 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
168 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
169 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
170 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
171 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
172 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
173 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
174 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
175 };
176
177 unsigned short
178 udf_cksum(s, n)
179 unsigned char *s;
180 int n;
181 {
182 unsigned short crc=0;
183
184 while (n-- > 0)
185 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
186 return crc;
187 }
188
189 /* UNICODE Checksum */
190 unsigned short
191 udf_unicode_cksum(s, n)
192 unsigned short *s;
193 int n;
194 {
195 unsigned short crc=0;
196
197 while (n-- > 0) {
198 /* Take high order byte first--corresponds to a big endian
199 * byte stream.
200 */
201 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
202 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
203 }
204 return crc;
205 }
206
207
208 /*
209 * Calculates a 16-bit checksum of the Implementation Use
210 * Extended Attribute header or Application Use Extended Attribute
211 * header. The fields AttributeType through ImplementationIdentifier
212 * (or ApplicationIdentifier) inclusively represent the
213 * data covered by the checksum (48 bytes).
214 *
215 */
216 uint16_t udf_ea_cksum(uint8_t *data) {
217 uint16_t checksum = 0;
218 int count;
219
220 for (count = 0; count < 48; count++) {
221 checksum += *data++;
222 }
223
224 return checksum;
225 }
226
227
228 #ifdef MAIN
229 unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
230
231 main()
232 {
233 unsigned short x;
234 x = cksum(bytes, sizeof bytes);
235 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
236 exit(0);
237 }
238 #endif
239
240 /*****************************************************************************/
241 /* #ifdef NEEDS_ISPRINT */
242 /***********************************************************************
243 * OSTA UDF compliant file name translation routine for OS/2,
244 * Windows 95, Windows NT, Macintosh and UNIX.
245 * Copyright 1995 Micro Design International, Inc.
246 * Written by Jason M. Rinn.
247 * Micro Design International gives permission for the free use of the
248 * following source code.
249 */
250
251 /***********************************************************************
252 * To use these routines with different operating systems.
253 *
254 * OS/2
255 * Define OS2
256 * Define MAXLEN = 254
257 *
258 * Windows 95
259 * Define WIN_95
260 * Define MAXLEN = 255
261 *
262 * Windows NT
263 * Define WIN_NT
264 * Define MAXLEN = 255
265 *
266 * Macintosh:
267 * Define MAC.
268 * Define MAXLEN = 31.
269 *
270 * UNIX
271 * Define UNIX.
272 * Define MAXLEN as specified by unix version.
273 */
274
275 #define ILLEGAL_CHAR_MARK 0x005F
276 #define CRC_MARK 0x0023
277 #define EXT_SIZE 5
278 #define PERIOD 0x002E
279 #define SPACE 0x0020
280
281 /*** PROTOTYPES ***/
282 int IsIllegal(unicode_t ch);
283
284 /* Define a function or macro which determines if a Unicode character is
285 * printable under your implementation.
286 */
287
288
289 /* #include <stdio.h> */
290 static int UnicodeIsPrint(unicode_t ch) {
291 return (ch >=' ') && (ch < 127);
292 }
293
294
295 int UnicodeLength(unicode_t *string) {
296 int length;
297 length = 0;
298 while (*string++) length++;
299
300 return length;
301 }
302
303
304 #ifdef _KERNEL
305 static int isprint(int c) {
306 return (c >= ' ') && (c != 127);
307 }
308 #endif
309
310
311 /***********************************************************************
312 * Translates a long file name to one using a MAXLEN and an illegal
313 * char set in accord with the OSTA requirements. Assumes the name has
314 * already been translated to Unicode.
315 *
316 * RETURN VALUE
317 *
318 * Number of unicode characters in translated name.
319 */
320 int UDFTransName(
321 unicode_t *newName, /* (Output)Translated name. Must be of length
322 * MAXLEN */
323 unicode_t *udfName, /* (Input) Name from UDF volume.*/
324 int udfLen) /* (Input) Length of UDF Name. */
325 {
326 int Index, newIndex = 0, needsCRC = false; /* index is shadowed */
327 int extIndex = 0, newExtIndex = 0, hasExt = false;
328 #if defined OS2 || defined WIN_95 || defined WIN_NT
329 int trailIndex = 0;
330 #endif
331 unsigned short valueCRC;
332 unicode_t current;
333 const char hexChar[] = "0123456789ABCDEF";
334
335 for (Index = 0; Index < udfLen; Index++) {
336 current = udfName[Index];
337
338 if (IsIllegal(current) || !UnicodeIsPrint(current)) {
339 needsCRC = true;
340 /* Replace Illegal and non-displayable chars with
341 * underscore.
342 */
343 current = ILLEGAL_CHAR_MARK;
344 /* Skip any other illegal or non-displayable
345 * characters.
346 */
347 while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
348 || !UnicodeIsPrint(udfName[Index+1]))) {
349 Index++;
350 }
351 }
352
353 /* Record position of extension, if one is found. */
354 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
355 if (udfLen == Index + 1) {
356 /* A trailing period is NOT an extension. */
357 hasExt = false;
358 } else {
359 hasExt = true;
360 extIndex = Index;
361 newExtIndex = newIndex;
362 }
363 }
364
365 #if defined OS2 || defined WIN_95 || defined WIN_NT
366 /* Record position of last char which is NOT period or space. */
367 else if (current != PERIOD && current != SPACE) {
368 trailIndex = newIndex;
369 }
370 #endif
371
372 if (newIndex < MAXLEN) {
373 newName[newIndex++] = current;
374 } else {
375 needsCRC = true;
376 }
377 }
378
379 #if defined OS2 || defined WIN_95 || defined WIN_NT
380 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
381 if (trailIndex != newIndex - 1) {
382 newIndex = trailIndex + 1;
383 needsCRC = true;
384 hasExt = false; /* Trailing period does not make an
385 * extension. */
386 }
387 #endif
388
389 if (needsCRC) {
390 unicode_t ext[EXT_SIZE];
391 int localExtIndex = 0;
392 if (hasExt) {
393 int maxFilenameLen;
394 /* Translate extension, and store it in ext. */
395 for(Index = 0; Index<EXT_SIZE &&
396 extIndex + Index +1 < udfLen; Index++ ) {
397 current = udfName[extIndex + Index + 1];
398 if (IsIllegal(current) ||
399 !UnicodeIsPrint(current)) {
400 needsCRC = 1;
401 /* Replace Illegal and non-displayable
402 * chars with underscore.
403 */
404 current = ILLEGAL_CHAR_MARK;
405 /* Skip any other illegal or
406 * non-displayable characters.
407 */
408 while(Index + 1 < EXT_SIZE
409 && (IsIllegal(udfName[extIndex +
410 Index + 2]) ||
411 !isprint(udfName[extIndex +
412 Index + 2]))) {
413 Index++;
414 }
415 }
416 ext[localExtIndex++] = current;
417 }
418
419 /* Truncate filename to leave room for extension and
420 * CRC.
421 */
422 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
423 if (newIndex > maxFilenameLen) {
424 newIndex = maxFilenameLen;
425 } else {
426 newIndex = newExtIndex;
427 }
428 } else if (newIndex > MAXLEN - 5) {
429 /*If no extension, make sure to leave room for CRC. */
430 newIndex = MAXLEN - 5;
431 }
432 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
433
434 /*Calculate CRC from original filename from FileIdentifier. */
435 valueCRC = udf_unicode_cksum(udfName, udfLen);
436 /* Convert 16-bits of CRC to hex characters. */
437 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
438 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
439 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
440 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
441
442 /* Place a translated extension at end, if found. */
443 if (hasExt) {
444 newName[newIndex++] = PERIOD;
445 for (Index = 0;Index < localExtIndex ;Index++ ) {
446 newName[newIndex++] = ext[Index];
447 }
448 }
449 }
450 return(newIndex);
451 }
452
453 #if defined OS2 || defined WIN_95 || defined WIN_NT
454 /***********************************************************************
455 * Decides if a Unicode character matches one of a list
456 * of ASCII characters.
457 * Used by OS2 version of IsIllegal for readability, since all of the
458 * illegal characters above 0x0020 are in the ASCII subset of Unicode.
459 * Works very similarly to the standard C function strchr().
460 *
461 * RETURN VALUE
462 *
463 * Non-zero if the Unicode character is in the given ASCII string.
464 */
465 int UnicodeInString(
466 unsigned char *string, /* (Input) String to search through. */
467 unicode_t ch) /* (Input) Unicode char to search for. */
468 {
469 int found = false;
470 while (*string != '\0' && found == false) {
471 /* These types should compare, since both are unsigned
472 * numbers. */
473 if (*string == ch) {
474 found = true;
475 }
476 string++;
477 }
478 return(found);
479 }
480 #endif /* OS2 */
481
482 /***********************************************************************
483 * Decides whether the given character is illegal for a given OS.
484 *
485 * RETURN VALUE
486 *
487 * Non-zero if char is illegal.
488 */
489 int IsIllegal(unicode_t ch)
490 {
491 #ifdef MAC
492 /* Only illegal character on the MAC is the colon. */
493 if (ch == 0x003A) {
494 return(1);
495 } else {
496 return(0);
497 }
498
499 #elif defined UNIX
500 /* Illegal UNIX characters are NULL and slash. */
501 if (ch == 0x0000 || ch == 0x002F) {
502 return(1);
503 } else {
504 return(0);
505 }
506
507 #elif defined OS2 || defined WIN_95 || defined WIN_NT
508 /* Illegal char's for OS/2 according to WARP toolkit. */
509 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
510 return(1);
511 } else {
512 return(0);
513 }
514 #endif
515 }
516 /* #endif*/ /* NEEDS_ISPRINT */
517
518