1 /* 2 * Various routines from the OSTA 2.01 specs. Copyrights are included with 3 * each code segment. Slight whitespace modifications have been made for 4 * formatting purposes. Typos/bugs have been fixed. 5 * 6 * $FreeBSD$ 7 */ 8 9 #include <fs/udf/osta.h> 10 11 /*****************************************************************************/ 12 /*********************************************************************** 13 * OSTA compliant Unicode compression, uncompression routines. 14 * Copyright 1995 Micro Design International, Inc. 15 * Written by Jason M. Rinn. 16 * Micro Design International gives permission for the free use of the 17 * following source code. 18 */ 19 20 /*********************************************************************** 21 * Takes an OSTA CS0 compressed unicode name, and converts 22 * it to Unicode. 23 * The Unicode output will be in the byte order 24 * that the local compiler uses for 16-bit values. 25 * NOTE: This routine only performs error checking on the compID. 26 * It is up to the user to ensure that the unicode buffer is large 27 * enough, and that the compressed unicode name is correct. 28 * 29 * RETURN VALUE 30 * 31 * The number of unicode characters which were uncompressed. 32 * A -1 is returned if the compression ID is invalid. 33 */ 34 int 35 udf_UncompressUnicode( 36 int numberOfBytes, /* (Input) number of bytes read from media. */ 37 byte *UDFCompressed, /* (Input) bytes read from media. */ 38 unicode_t *unicode) /* (Output) uncompressed unicode characters. */ 39 { 40 unsigned int compID; 41 int returnValue, unicodeIndex, byteIndex; 42 43 /* Use UDFCompressed to store current byte being read. */ 44 compID = UDFCompressed[0]; 45 46 /* First check for valid compID. */ 47 if (compID != 8 && compID != 16) { 48 returnValue = -1; 49 } else { 50 unicodeIndex = 0; 51 byteIndex = 1; 52 53 /* Loop through all the bytes. */ 54 while (byteIndex < numberOfBytes) { 55 if (compID == 16) { 56 /* Move the first byte to the high bits of the 57 * unicode char. 58 */ 59 unicode[unicodeIndex] = 60 UDFCompressed[byteIndex++] << 8; 61 } else { 62 unicode[unicodeIndex] = 0; 63 } 64 if (byteIndex < numberOfBytes) { 65 /*Then the next byte to the low bits. */ 66 unicode[unicodeIndex] |= 67 UDFCompressed[byteIndex++]; 68 } 69 unicodeIndex++; 70 } 71 returnValue = unicodeIndex; 72 } 73 return(returnValue); 74 } 75 76 /* 77 * Almost same as udf_UncompressUnicode(). The difference is that 78 * it keeps byte order of unicode string. 79 */ 80 int 81 udf_UncompressUnicodeByte( 82 int numberOfBytes, /* (Input) number of bytes read from media. */ 83 byte *UDFCompressed, /* (Input) bytes read from media. */ 84 byte *unicode) /* (Output) uncompressed unicode characters. */ 85 { 86 unsigned int compID; 87 int returnValue, unicodeIndex, byteIndex; 88 89 /* Use UDFCompressed to store current byte being read. */ 90 compID = UDFCompressed[0]; 91 92 /* First check for valid compID. */ 93 if (compID != 8 && compID != 16) { 94 returnValue = -1; 95 } else { 96 unicodeIndex = 0; 97 byteIndex = 1; 98 99 /* Loop through all the bytes. */ 100 while (byteIndex < numberOfBytes) { 101 if (compID == 16) { 102 /* Move the first byte to the high bits of the 103 * unicode char. 104 */ 105 unicode[unicodeIndex++] = 106 UDFCompressed[byteIndex++]; 107 } else { 108 unicode[unicodeIndex++] = 0; 109 } 110 if (byteIndex < numberOfBytes) { 111 /*Then the next byte to the low bits. */ 112 unicode[unicodeIndex++] = 113 UDFCompressed[byteIndex++]; 114 } 115 } 116 returnValue = unicodeIndex; 117 } 118 return(returnValue); 119 } 120 121 /*********************************************************************** 122 * DESCRIPTION: 123 * Takes a string of unicode wide characters and returns an OSTA CS0 124 * compressed unicode string. The unicode MUST be in the byte order of 125 * the compiler in order to obtain correct results. Returns an error 126 * if the compression ID is invalid. 127 * 128 * NOTE: This routine assumes the implementation already knows, by 129 * the local environment, how many bits are appropriate and 130 * therefore does no checking to test if the input characters fit 131 * into that number of bits or not. 132 * 133 * RETURN VALUE 134 * 135 * The total number of bytes in the compressed OSTA CS0 string, 136 * including the compression ID. 137 * A -1 is returned if the compression ID is invalid. 138 */ 139 int 140 udf_CompressUnicode( 141 int numberOfChars, /* (Input) number of unicode characters. */ 142 int compID, /* (Input) compression ID to be used. */ 143 unicode_t *unicode, /* (Input) unicode characters to compress. */ 144 byte *UDFCompressed) /* (Output) compressed string, as bytes. */ 145 { 146 int byteIndex, unicodeIndex; 147 148 if (compID != 8 && compID != 16) { 149 byteIndex = -1; /* Unsupported compression ID ! */ 150 } else { 151 /* Place compression code in first byte. */ 152 UDFCompressed[0] = compID; 153 154 byteIndex = 1; 155 unicodeIndex = 0; 156 while (unicodeIndex < numberOfChars) { 157 if (compID == 16) { 158 /* First, place the high bits of the char 159 * into the byte stream. 160 */ 161 UDFCompressed[byteIndex++] = 162 (unicode[unicodeIndex] & 0xFF00) >> 8; 163 } 164 /*Then place the low bits into the stream. */ 165 UDFCompressed[byteIndex++] = 166 unicode[unicodeIndex] & 0x00FF; 167 unicodeIndex++; 168 } 169 } 170 return(byteIndex); 171 } 172 173 /*****************************************************************************/ 174 /* 175 * CRC 010041 176 */ 177 static unsigned short crc_table[256] = { 178 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, 179 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 180 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, 181 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, 182 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, 183 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, 184 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, 185 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 186 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, 187 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, 188 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, 189 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, 190 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, 191 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 192 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, 193 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, 194 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, 195 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, 196 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, 197 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, 198 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, 199 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 200 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, 201 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, 202 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, 203 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, 204 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, 205 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, 206 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 207 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, 208 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, 209 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 210 }; 211 212 unsigned short 213 udf_cksum(s, n) 214 unsigned char *s; 215 int n; 216 { 217 unsigned short crc=0; 218 219 while (n-- > 0) 220 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8); 221 return crc; 222 } 223 224 /* UNICODE Checksum */ 225 unsigned short 226 udf_unicode_cksum(s, n) 227 unsigned short *s; 228 int n; 229 { 230 unsigned short crc=0; 231 232 while (n-- > 0) { 233 /* Take high order byte first--corresponds to a big endian 234 * byte stream. 235 */ 236 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8); 237 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8); 238 } 239 return crc; 240 } 241 242 #ifdef MAIN 243 unsigned char bytes[] = { 0x70, 0x6A, 0x77 }; 244 245 main() 246 { 247 unsigned short x; 248 x = cksum(bytes, sizeof bytes); 249 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299); 250 exit(0); 251 } 252 #endif 253 254 /*****************************************************************************/ 255 #ifdef NEEDS_ISPRINT 256 /*********************************************************************** 257 * OSTA UDF compliant file name translation routine for OS/2, 258 * Windows 95, Windows NT, Macintosh and UNIX. 259 * Copyright 1995 Micro Design International, Inc. 260 * Written by Jason M. Rinn. 261 * Micro Design International gives permission for the free use of the 262 * following source code. 263 */ 264 265 /*********************************************************************** 266 * To use these routines with different operating systems. 267 * 268 * OS/2 269 * Define OS2 270 * Define MAXLEN = 254 271 * 272 * Windows 95 273 * Define WIN_95 274 * Define MAXLEN = 255 275 * 276 * Windows NT 277 * Define WIN_NT 278 * Define MAXLEN = 255 279 * 280 * Macintosh: 281 * Define MAC. 282 * Define MAXLEN = 31. 283 * 284 * UNIX 285 * Define UNIX. 286 * Define MAXLEN as specified by unix version. 287 */ 288 289 #define ILLEGAL_CHAR_MARK 0x005F 290 #define CRC_MARK 0x0023 291 #define EXT_SIZE 5 292 #define TRUE 1 293 #define FALSE 0 294 #define PERIOD 0x002E 295 #define SPACE 0x0020 296 297 /*** PROTOTYPES ***/ 298 int IsIllegal(unicode_t ch); 299 300 /* Define a function or macro which determines if a Unicode character is 301 * printable under your implementation. 302 */ 303 int UnicodeIsPrint(unicode_t); 304 305 /*********************************************************************** 306 * Translates a long file name to one using a MAXLEN and an illegal 307 * char set in accord with the OSTA requirements. Assumes the name has 308 * already been translated to Unicode. 309 * 310 * RETURN VALUE 311 * 312 * Number of unicode characters in translated name. 313 */ 314 int UDFTransName( 315 unicode_t *newName, /* (Output)Translated name. Must be of length 316 * MAXLEN */ 317 unicode_t *udfName, /* (Input) Name from UDF volume.*/ 318 int udfLen) /* (Input) Length of UDF Name. */ 319 { 320 int index, newIndex = 0, needsCRC = FALSE; 321 int extIndex = 0, newExtIndex = 0, hasExt = FALSE; 322 #if defined OS2 || defined WIN_95 || defined WIN_NT 323 int trailIndex = 0; 324 #endif 325 unsigned short valueCRC; 326 unicode_t current; 327 const char hexChar[] = "0123456789ABCDEF"; 328 329 for (index = 0; index < udfLen; index++) { 330 current = udfName[index]; 331 332 if (IsIllegal(current) || !UnicodeIsPrint(current)) { 333 needsCRC = TRUE; 334 /* Replace Illegal and non-displayable chars with 335 * underscore. 336 */ 337 current = ILLEGAL_CHAR_MARK; 338 /* Skip any other illegal or non-displayable 339 * characters. 340 */ 341 while(index+1 < udfLen && (IsIllegal(udfName[index+1]) 342 || !UnicodeIsPrint(udfName[index+1]))) { 343 index++; 344 } 345 } 346 347 /* Record position of extension, if one is found. */ 348 if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) { 349 if (udfLen == index + 1) { 350 /* A trailing period is NOT an extension. */ 351 hasExt = FALSE; 352 } else { 353 hasExt = TRUE; 354 extIndex = index; 355 newExtIndex = newIndex; 356 } 357 } 358 359 #if defined OS2 || defined WIN_95 || defined WIN_NT 360 /* Record position of last char which is NOT period or space. */ 361 else if (current != PERIOD && current != SPACE) { 362 trailIndex = newIndex; 363 } 364 #endif 365 366 if (newIndex < MAXLEN) { 367 newName[newIndex++] = current; 368 } else { 369 needsCRC = TRUE; 370 } 371 } 372 373 #if defined OS2 || defined WIN_95 || defined WIN_NT 374 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */ 375 if (trailIndex != newIndex - 1) { 376 newIndex = trailIndex + 1; 377 needsCRC = TRUE; 378 hasExt = FALSE; /* Trailing period does not make an 379 * extension. */ 380 } 381 #endif 382 383 if (needsCRC) { 384 unicode_t ext[EXT_SIZE]; 385 int localExtIndex = 0; 386 if (hasExt) { 387 int maxFilenameLen; 388 /* Translate extension, and store it in ext. */ 389 for(index = 0; index<EXT_SIZE && 390 extIndex + index +1 < udfLen; index++ ) { 391 current = udfName[extIndex + index + 1]; 392 if (IsIllegal(current) || 393 !UnicodeIsPrint(current)) { 394 needsCRC = 1; 395 /* Replace Illegal and non-displayable 396 * chars with underscore. 397 */ 398 current = ILLEGAL_CHAR_MARK; 399 /* Skip any other illegal or 400 * non-displayable characters. 401 */ 402 while(index + 1 < EXT_SIZE 403 && (IsIllegal(udfName[extIndex + 404 index + 2]) || 405 !isprint(udfName[extIndex + 406 index + 2]))) { 407 index++; 408 } 409 } 410 ext[localExtIndex++] = current; 411 } 412 413 /* Truncate filename to leave room for extension and 414 * CRC. 415 */ 416 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1); 417 if (newIndex > maxFilenameLen) { 418 newIndex = maxFilenameLen; 419 } else { 420 newIndex = newExtIndex; 421 } 422 } else if (newIndex > MAXLEN - 5) { 423 /*If no extension, make sure to leave room for CRC. */ 424 newIndex = MAXLEN - 5; 425 } 426 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */ 427 428 /*Calculate CRC from original filename from FileIdentifier. */ 429 valueCRC = udf_unicode_cksum(udfName, udfLen); 430 /* Convert 16-bits of CRC to hex characters. */ 431 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; 432 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; 433 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; 434 newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; 435 436 /* Place a translated extension at end, if found. */ 437 if (hasExt) { 438 newName[newIndex++] = PERIOD; 439 for (index = 0;index < localExtIndex ;index++ ) { 440 newName[newIndex++] = ext[index]; 441 } 442 } 443 } 444 return(newIndex); 445 } 446 447 #if defined OS2 || defined WIN_95 || defined WIN_NT 448 /*********************************************************************** 449 * Decides if a Unicode character matches one of a list 450 * of ASCII characters. 451 * Used by OS2 version of IsIllegal for readability, since all of the 452 * illegal characters above 0x0020 are in the ASCII subset of Unicode. 453 * Works very similarly to the standard C function strchr(). 454 * 455 * RETURN VALUE 456 * 457 * Non-zero if the Unicode character is in the given ASCII string. 458 */ 459 int UnicodeInString( 460 unsigned char *string, /* (Input) String to search through. */ 461 unicode_t ch) /* (Input) Unicode char to search for. */ 462 { 463 int found = FALSE; 464 while (*string != '\0' && found == FALSE) { 465 /* These types should compare, since both are unsigned 466 * numbers. */ 467 if (*string == ch) { 468 found = TRUE; 469 } 470 string++; 471 } 472 return(found); 473 } 474 #endif /* OS2 */ 475 476 /*********************************************************************** 477 * Decides whether the given character is illegal for a given OS. 478 * 479 * RETURN VALUE 480 * 481 * Non-zero if char is illegal. 482 */ 483 int IsIllegal(unicode_t ch) 484 { 485 #ifdef MAC 486 /* Only illegal character on the MAC is the colon. */ 487 if (ch == 0x003A) { 488 return(1); 489 } else { 490 return(0); 491 } 492 493 #elif defined UNIX 494 /* Illegal UNIX characters are NULL and slash. */ 495 if (ch == 0x0000 || ch == 0x002F) { 496 return(1); 497 } else { 498 return(0); 499 } 500 501 #elif defined OS2 || defined WIN_95 || defined WIN_NT 502 /* Illegal char's for OS/2 according to WARP toolkit. */ 503 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) { 504 return(1); 505 } else { 506 return(0); 507 } 508 #endif 509 } 510 #endif 511