udf_osta.c revision 1.4.30.1 1 /* $NetBSD: udf_osta.c,v 1.4.30.1 2007/12/13 21:56:51 bouyer Exp $ */
2
3 #include <sys/cdefs.h>
4 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.4.30.1 2007/12/13 21:56:51 bouyer Exp $");
5
6 /*
7 * Various routines from the OSTA 2.01 specs. Copyrights are included with
8 * each code segment. Slight whitespace modifications have been made for
9 * formatting purposes. Typos/bugs have been fixed.
10 *
11 */
12
13 #include "udf_osta.h"
14
15
16 /*****************************************************************************/
17 /***********************************************************************
18 * OSTA compliant Unicode compression, uncompression routines.
19 * Copyright 1995 Micro Design International, Inc.
20 * Written by Jason M. Rinn.
21 * Micro Design International gives permission for the free use of the
22 * following source code.
23 */
24
25 /***********************************************************************
26 * Takes an OSTA CS0 compressed unicode name, and converts
27 * it to Unicode.
28 * The Unicode output will be in the byte order
29 * that the local compiler uses for 16-bit values.
30 * NOTE: This routine only performs error checking on the compID.
31 * It is up to the user to ensure that the unicode buffer is large
32 * enough, and that the compressed unicode name is correct.
33 *
34 * RETURN VALUE
35 *
36 * The number of unicode characters which were uncompressed.
37 * A -1 is returned if the compression ID is invalid.
38 */
39 int
40 udf_UncompressUnicode(
41 int numberOfBytes, /* (Input) number of bytes read from media. */
42 byte *UDFCompressed, /* (Input) bytes read from media. */
43 unicode_t *unicode) /* (Output) uncompressed unicode characters. */
44 {
45 unsigned int compID;
46 int returnValue, unicodeIndex, byteIndex;
47
48 /* Use UDFCompressed to store current byte being read. */
49 compID = UDFCompressed[0];
50
51 /* First check for valid compID. */
52 if (compID != 8 && compID != 16) {
53 returnValue = -1;
54 } else {
55 unicodeIndex = 0;
56 byteIndex = 1;
57
58 /* Loop through all the bytes. */
59 while (byteIndex < numberOfBytes) {
60 if (compID == 16) {
61 /* Move the first byte to the high bits of the
62 * unicode char.
63 */
64 unicode[unicodeIndex] =
65 UDFCompressed[byteIndex++] << 8;
66 } else {
67 unicode[unicodeIndex] = 0;
68 }
69 if (byteIndex < numberOfBytes) {
70 /*Then the next byte to the low bits. */
71 unicode[unicodeIndex] |=
72 UDFCompressed[byteIndex++];
73 }
74 unicodeIndex++;
75 }
76 returnValue = unicodeIndex;
77 }
78 return(returnValue);
79 }
80
81 /***********************************************************************
82 * DESCRIPTION:
83 * Takes a string of unicode wide characters and returns an OSTA CS0
84 * compressed unicode string. The unicode MUST be in the byte order of
85 * the compiler in order to obtain correct results. Returns an error
86 * if the compression ID is invalid.
87 *
88 * NOTE: This routine assumes the implementation already knows, by
89 * the local environment, how many bits are appropriate and
90 * therefore does no checking to test if the input characters fit
91 * into that number of bits or not.
92 *
93 * RETURN VALUE
94 *
95 * The total number of bytes in the compressed OSTA CS0 string,
96 * including the compression ID.
97 * A -1 is returned if the compression ID is invalid.
98 */
99 int
100 udf_CompressUnicode(
101 int numberOfChars, /* (Input) number of unicode characters. */
102 int compID, /* (Input) compression ID to be used. */
103 unicode_t *unicode, /* (Input) unicode characters to compress. */
104 byte *UDFCompressed) /* (Output) compressed string, as bytes. */
105 {
106 int byteIndex, unicodeIndex;
107
108 if (compID != 8 && compID != 16) {
109 byteIndex = -1; /* Unsupported compression ID ! */
110 } else {
111 /* Place compression code in first byte. */
112 UDFCompressed[0] = compID;
113
114 byteIndex = 1;
115 unicodeIndex = 0;
116 while (unicodeIndex < numberOfChars) {
117 if (compID == 16) {
118 /* First, place the high bits of the char
119 * into the byte stream.
120 */
121 UDFCompressed[byteIndex++] =
122 (unicode[unicodeIndex] & 0xFF00) >> 8;
123 }
124 /*Then place the low bits into the stream. */
125 UDFCompressed[byteIndex++] =
126 unicode[unicodeIndex] & 0x00FF;
127 unicodeIndex++;
128 }
129 }
130 return(byteIndex);
131 }
132
133 /*****************************************************************************/
134 /*
135 * CRC 010041
136 */
137 static unsigned short crc_table[256] = {
138 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
139 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
140 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
141 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
142 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
143 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
144 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
145 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
146 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
147 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
148 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
149 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
150 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
151 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
152 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
153 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
154 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
155 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
156 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
157 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
158 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
159 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
160 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
161 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
162 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
163 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
164 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
165 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
166 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
167 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
168 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
169 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
170 };
171
172 unsigned short
173 udf_cksum(s, n)
174 unsigned char *s;
175 int n;
176 {
177 unsigned short crc=0;
178
179 while (n-- > 0)
180 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
181 return crc;
182 }
183
184 /* UNICODE Checksum */
185 unsigned short
186 udf_unicode_cksum(s, n)
187 unsigned short *s;
188 int n;
189 {
190 unsigned short crc=0;
191
192 while (n-- > 0) {
193 /* Take high order byte first--corresponds to a big endian
194 * byte stream.
195 */
196 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
197 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
198 }
199 return crc;
200 }
201
202 #ifdef MAIN
203 unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
204
205 main()
206 {
207 unsigned short x;
208 x = cksum(bytes, sizeof bytes);
209 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
210 exit(0);
211 }
212 #endif
213
214 /*****************************************************************************/
215 /* #ifdef NEEDS_ISPRINT */
216 /***********************************************************************
217 * OSTA UDF compliant file name translation routine for OS/2,
218 * Windows 95, Windows NT, Macintosh and UNIX.
219 * Copyright 1995 Micro Design International, Inc.
220 * Written by Jason M. Rinn.
221 * Micro Design International gives permission for the free use of the
222 * following source code.
223 */
224
225 /***********************************************************************
226 * To use these routines with different operating systems.
227 *
228 * OS/2
229 * Define OS2
230 * Define MAXLEN = 254
231 *
232 * Windows 95
233 * Define WIN_95
234 * Define MAXLEN = 255
235 *
236 * Windows NT
237 * Define WIN_NT
238 * Define MAXLEN = 255
239 *
240 * Macintosh:
241 * Define MAC.
242 * Define MAXLEN = 31.
243 *
244 * UNIX
245 * Define UNIX.
246 * Define MAXLEN as specified by unix version.
247 */
248
249 #define ILLEGAL_CHAR_MARK 0x005F
250 #define CRC_MARK 0x0023
251 #define EXT_SIZE 5
252 #define PERIOD 0x002E
253 #define SPACE 0x0020
254
255 /*** PROTOTYPES ***/
256 int IsIllegal(unicode_t ch);
257
258 /* Define a function or macro which determines if a Unicode character is
259 * printable under your implementation.
260 */
261
262
263 /* #include <stdio.h> */
264 static int UnicodeIsPrint(unicode_t ch) {
265 return (ch >=' ') && (ch < 127);
266 }
267
268
269 int UnicodeLength(unicode_t *string) {
270 int length;
271 length = 0;
272 while (*string++) length++;
273
274 return length;
275 }
276
277
278 static int isprint(unsigned char c) {
279 return (c >= ' ') && (c != 127);
280 }
281
282
283 /***********************************************************************
284 * Translates a long file name to one using a MAXLEN and an illegal
285 * char set in accord with the OSTA requirements. Assumes the name has
286 * already been translated to Unicode.
287 *
288 * RETURN VALUE
289 *
290 * Number of unicode characters in translated name.
291 */
292 int UDFTransName(
293 unicode_t *newName, /* (Output)Translated name. Must be of length
294 * MAXLEN */
295 unicode_t *udfName, /* (Input) Name from UDF volume.*/
296 int udfLen) /* (Input) Length of UDF Name. */
297 {
298 int Index, newIndex = 0, needsCRC = false; /* index is shadowed */
299 int extIndex = 0, newExtIndex = 0, hasExt = false;
300 #if defined OS2 || defined WIN_95 || defined WIN_NT
301 int trailIndex = 0;
302 #endif
303 unsigned short valueCRC;
304 unicode_t current;
305 const char hexChar[] = "0123456789ABCDEF";
306
307 for (Index = 0; Index < udfLen; Index++) {
308 current = udfName[Index];
309
310 if (IsIllegal(current) || !UnicodeIsPrint(current)) {
311 needsCRC = true;
312 /* Replace Illegal and non-displayable chars with
313 * underscore.
314 */
315 current = ILLEGAL_CHAR_MARK;
316 /* Skip any other illegal or non-displayable
317 * characters.
318 */
319 while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
320 || !UnicodeIsPrint(udfName[Index+1]))) {
321 Index++;
322 }
323 }
324
325 /* Record position of extension, if one is found. */
326 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
327 if (udfLen == Index + 1) {
328 /* A trailing period is NOT an extension. */
329 hasExt = false;
330 } else {
331 hasExt = true;
332 extIndex = Index;
333 newExtIndex = newIndex;
334 }
335 }
336
337 #if defined OS2 || defined WIN_95 || defined WIN_NT
338 /* Record position of last char which is NOT period or space. */
339 else if (current != PERIOD && current != SPACE) {
340 trailIndex = newIndex;
341 }
342 #endif
343
344 if (newIndex < MAXLEN) {
345 newName[newIndex++] = current;
346 } else {
347 needsCRC = true;
348 }
349 }
350
351 #if defined OS2 || defined WIN_95 || defined WIN_NT
352 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
353 if (trailIndex != newIndex - 1) {
354 newIndex = trailIndex + 1;
355 needsCRC = true;
356 hasExt = false; /* Trailing period does not make an
357 * extension. */
358 }
359 #endif
360
361 if (needsCRC) {
362 unicode_t ext[EXT_SIZE];
363 int localExtIndex = 0;
364 if (hasExt) {
365 int maxFilenameLen;
366 /* Translate extension, and store it in ext. */
367 for(Index = 0; Index<EXT_SIZE &&
368 extIndex + Index +1 < udfLen; Index++ ) {
369 current = udfName[extIndex + Index + 1];
370 if (IsIllegal(current) ||
371 !UnicodeIsPrint(current)) {
372 needsCRC = 1;
373 /* Replace Illegal and non-displayable
374 * chars with underscore.
375 */
376 current = ILLEGAL_CHAR_MARK;
377 /* Skip any other illegal or
378 * non-displayable characters.
379 */
380 while(Index + 1 < EXT_SIZE
381 && (IsIllegal(udfName[extIndex +
382 Index + 2]) ||
383 !isprint(udfName[extIndex +
384 Index + 2]))) {
385 Index++;
386 }
387 }
388 ext[localExtIndex++] = current;
389 }
390
391 /* Truncate filename to leave room for extension and
392 * CRC.
393 */
394 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
395 if (newIndex > maxFilenameLen) {
396 newIndex = maxFilenameLen;
397 } else {
398 newIndex = newExtIndex;
399 }
400 } else if (newIndex > MAXLEN - 5) {
401 /*If no extension, make sure to leave room for CRC. */
402 newIndex = MAXLEN - 5;
403 }
404 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
405
406 /*Calculate CRC from original filename from FileIdentifier. */
407 valueCRC = udf_unicode_cksum(udfName, udfLen);
408 /* Convert 16-bits of CRC to hex characters. */
409 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
410 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
411 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
412 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
413
414 /* Place a translated extension at end, if found. */
415 if (hasExt) {
416 newName[newIndex++] = PERIOD;
417 for (Index = 0;Index < localExtIndex ;Index++ ) {
418 newName[newIndex++] = ext[Index];
419 }
420 }
421 }
422 return(newIndex);
423 }
424
425 #if defined OS2 || defined WIN_95 || defined WIN_NT
426 /***********************************************************************
427 * Decides if a Unicode character matches one of a list
428 * of ASCII characters.
429 * Used by OS2 version of IsIllegal for readability, since all of the
430 * illegal characters above 0x0020 are in the ASCII subset of Unicode.
431 * Works very similarly to the standard C function strchr().
432 *
433 * RETURN VALUE
434 *
435 * Non-zero if the Unicode character is in the given ASCII string.
436 */
437 int UnicodeInString(
438 unsigned char *string, /* (Input) String to search through. */
439 unicode_t ch) /* (Input) Unicode char to search for. */
440 {
441 int found = false;
442 while (*string != '\0' && found == false) {
443 /* These types should compare, since both are unsigned
444 * numbers. */
445 if (*string == ch) {
446 found = true;
447 }
448 string++;
449 }
450 return(found);
451 }
452 #endif /* OS2 */
453
454 /***********************************************************************
455 * Decides whether the given character is illegal for a given OS.
456 *
457 * RETURN VALUE
458 *
459 * Non-zero if char is illegal.
460 */
461 int IsIllegal(unicode_t ch)
462 {
463 #ifdef MAC
464 /* Only illegal character on the MAC is the colon. */
465 if (ch == 0x003A) {
466 return(1);
467 } else {
468 return(0);
469 }
470
471 #elif defined UNIX
472 /* Illegal UNIX characters are NULL and slash. */
473 if (ch == 0x0000 || ch == 0x002F) {
474 return(1);
475 } else {
476 return(0);
477 }
478
479 #elif defined OS2 || defined WIN_95 || defined WIN_NT
480 /* Illegal char's for OS/2 according to WARP toolkit. */
481 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
482 return(1);
483 } else {
484 return(0);
485 }
486 #endif
487 }
488 /* #endif*/ /* NEEDS_ISPRINT */
489
490