1 /* 2 * Copyright 2001-2002 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 7 #pragma ident "%Z%%M% %I% %E% SMI" 8 9 /* 10 * The contents of this file are subject to the Netscape Public 11 * License Version 1.1 (the "License"); you may not use this file 12 * except in compliance with the License. You may obtain a copy of 13 * the License at http://www.mozilla.org/NPL/ 14 * 15 * Software distributed under the License is distributed on an "AS 16 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 17 * implied. See the License for the specific language governing 18 * rights and limitations under the License. 19 * 20 * The Original Code is Mozilla Communicator client code, released 21 * March 31, 1998. 22 * 23 * The Initial Developer of the Original Code is Netscape 24 * Communications Corporation. Portions created by Netscape are 25 * Copyright (C) 1998-1999 Netscape Communications Corporation. All 26 * Rights Reserved. 27 * 28 * Contributor(s): 29 */ 30 31 /* line64.c - routines for dealing with the slapd line format */ 32 33 #include <stdio.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <ctype.h> 37 #ifndef macintosh 38 #include <sys/types.h> 39 #endif 40 #ifdef _WIN32 41 #include <windows.h> 42 #elif !defined( macintosh ) 43 #include <sys/socket.h> 44 #endif 45 #include "ldaplog.h" 46 #include "ldif.h" 47 48 #ifndef isascii 49 #define isascii( c ) (!((c) & ~0177)) 50 #endif 51 52 #define RIGHT2 0x03 53 #define RIGHT4 0x0f 54 #define CONTINUED_LINE_MARKER '\001' 55 56 #define ISBLANK(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') /* not "\r\v\f" */ 57 58 #define LDIF_OPT_ISSET( value, opt ) (((value) & (opt)) != 0 ) 59 60 static char nib2b64[0x40] = 61 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 62 63 static unsigned char b642nib[0x80] = { 64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 70 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 71 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 72 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 73 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 74 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 75 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 76 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 77 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 78 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 79 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff 80 }; 81 82 static int ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, 83 int lenused, int wraplen ); 84 85 /* 86 * str_parse_line - takes a line of the form "type:[:] value" and splits it 87 * into components "type" and "value". if a double colon separates type from 88 * value, then value is encoded in base 64, and parse_line un-decodes it 89 * (in place) before returning. 90 */ 91 92 int 93 str_parse_line( 94 char *line, 95 char **type, 96 char **value, 97 int *vlen 98 ) 99 { 100 char *p, *s, *d; 101 int b64; 102 103 /* skip any leading space */ 104 while ( ISBLANK( *line ) ) { 105 line++; 106 } 107 *type = line; 108 109 for ( s = line; *s && *s != ':'; s++ ) 110 ; /* NULL */ 111 if ( *s == '\0' ) { 112 113 /* Comment-out while we address calling libldif from ns-back-ldbm 114 on NT. 1 of 3 */ 115 #if defined( _WIN32 ) 116 /* 117 #endif 118 LDAPDebug( LDAP_DEBUG_PARSE, "str_parse_line: missing ':' " 119 "on line \"%s\"\n", line, 0, 0 ); 120 #if defined( _WIN32 ) 121 */ 122 #endif 123 return( -1 ); 124 } 125 126 /* trim any space between type and : */ 127 for ( p = s - 1; p > line && ISBLANK( *p ); p-- ) { 128 *p = '\0'; 129 } 130 *s++ = '\0'; 131 132 /* check for double : - indicates base 64 encoded value */ 133 if ( *s == ':' ) { 134 s++; 135 b64 = 1; 136 137 /* single : - normally encoded value */ 138 } else { 139 b64 = 0; 140 } 141 142 /* skip space between : and value */ 143 while ( ISBLANK( *s ) ) { 144 s++; 145 } 146 147 /* 148 * If no value is present, return a zero-length string for 149 * *value, with *vlen set to zero. 150 */ 151 if ( *s == '\0' ) { 152 *value = s; 153 *vlen = 0; 154 return( 0 ); 155 } 156 157 /* check for continued line markers that should be deleted */ 158 for ( p = s, d = s; *p; p++ ) { 159 if ( *p != CONTINUED_LINE_MARKER ) 160 *d++ = *p; 161 } 162 *d = '\0'; 163 164 *value = s; 165 if ( b64 ) { 166 if (( *vlen = ldif_base64_decode( s, (unsigned char *)s )) 167 < 0 ) { 168 /* Comment-out while we address calling libldif from ns-back-ldbm 169 on NT. 3 of 3 */ 170 #if defined( _WIN32 ) 171 /* 172 #endif 173 LDAPDebug( LDAP_DEBUG_ANY, 174 "str_parse_line: invalid base 64 char on line \"%s\"\n", 175 line, 0, 0 ); 176 #if defined( _WIN32 ) 177 */ 178 #endif 179 return( -1 ); 180 } 181 s[ *vlen ] = '\0'; 182 } else { 183 *vlen = (int) (d - s); 184 } 185 186 return( 0 ); 187 } 188 189 190 /* 191 * ldif_base64_decode - take the BASE64-encoded characters in "src" 192 * (a zero-terminated string) and decode them into the the buffer "dst". 193 * "src" and "dst" can be the same if in-place decoding is desired. 194 * "dst" must be large enough to hold the decoded octets. No more than 195 * 3 * strlen( src ) / 4 bytes will be produced. 196 * "dst" may contain zero octets anywhere within it, but it is not 197 * zero-terminated by this function. 198 * 199 * The number of bytes copied to "dst" is returned if all goes well. 200 * -1 is returned if the BASE64 encoding in "src" is invalid. 201 */ 202 203 int 204 ldif_base64_decode( char *src, unsigned char *dst ) 205 { 206 char *p, *stop; 207 unsigned char nib, *byte; 208 int i, len; 209 210 stop = strchr( src, '\0' ); 211 byte = dst; 212 for ( p = src, len = 0; p < stop; p += 4, len += 3 ) { 213 for ( i = 0; i < 4; i++ ) { 214 if ( p[i] != '=' && (p[i] & 0x80 || 215 b642nib[ p[i] & 0x7f ] > 0x3f) ) { 216 return( -1 ); 217 } 218 } 219 220 /* first digit */ 221 nib = b642nib[ p[0] & 0x7f ]; 222 byte[0] = nib << 2; 223 224 /* second digit */ 225 nib = b642nib[ p[1] & 0x7f ]; 226 byte[0] |= nib >> 4; 227 228 /* third digit */ 229 if ( p[2] == '=' ) { 230 len += 1; 231 break; 232 } 233 byte[1] = (nib & RIGHT4) << 4; 234 nib = b642nib[ p[2] & 0x7f ]; 235 byte[1] |= nib >> 2; 236 237 /* fourth digit */ 238 if ( p[3] == '=' ) { 239 len += 2; 240 break; 241 } 242 byte[2] = (nib & RIGHT2) << 6; 243 nib = b642nib[ p[3] & 0x7f ]; 244 byte[2] |= nib; 245 246 byte += 3; 247 } 248 249 return( len ); 250 } 251 252 /* 253 * str_getline - return the next "line" (minus newline) of input from a 254 * string buffer of lines separated by newlines, terminated by \n\n 255 * or \0. this routine handles continued lines, bundling them into 256 * a single big line before returning. if a line begins with a white 257 * space character, it is a continuation of the previous line. the white 258 * space character (nb: only one char), and preceeding newline are changed 259 * into CONTINUED_LINE_MARKER chars, to be deleted later by the 260 * str_parse_line() routine above. 261 * 262 * it takes a pointer to a pointer to the buffer on the first call, 263 * which it updates and must be supplied on subsequent calls. 264 * 265 * XXX need to update this function to also support <CR><LF> as EOL. 266 * XXX supports <CR><LF> as of 07/29/1998 (richm) 267 */ 268 269 char * 270 str_getline( char **next ) 271 { 272 char *l; 273 char c; 274 char *p; 275 276 if ( *next == NULL || **next == '\n' || **next == '\0' ) { 277 return( NULL ); 278 } 279 280 while ( **next == '#' ) { /* skip comment lines */ 281 if (( *next = strchr( *next, '\n' )) == NULL ) { 282 return( NULL ); 283 } 284 (*next)++; 285 } 286 287 l = *next; 288 while ( (*next = strchr( *next, '\n' )) != NULL ) { 289 p = *next - 1; /* pointer to character previous to the newline */ 290 c = *(*next + 1); /* character after the newline */ 291 if ( ISBLANK( c ) && c != '\n' ) { 292 /* DOS EOL is \r\n, so if the character before */ 293 /* the \n is \r, continue it too */ 294 if (*p == '\r') 295 *p = CONTINUED_LINE_MARKER; 296 **next = CONTINUED_LINE_MARKER; 297 *(*next+1) = CONTINUED_LINE_MARKER; 298 } else { 299 /* DOS EOL is \r\n, so if the character before */ 300 /* the \n is \r, null it too */ 301 if (*p == '\r') 302 *p = '\0'; 303 *(*next)++ = '\0'; 304 break; 305 } 306 (*next)++; 307 } 308 309 return( l ); 310 } 311 312 313 #define LDIF_SAFE_CHAR( c ) ( (c) != '\r' && (c) != '\n' ) 314 #define LDIF_CONSERVATIVE_CHAR( c ) ( LDIF_SAFE_CHAR(c) && isascii((c)) \ 315 && ( isprint((c)) || (c) == '\t' )) 316 #define LDIF_SAFE_INITCHAR( c ) ( LDIF_SAFE_CHAR(c) && (c) != ':' \ 317 && (c) != ' ' && (c) != '<' ) 318 #define LDIF_CONSERVATIVE_INITCHAR( c ) ( LDIF_SAFE_INITCHAR( c ) && \ 319 ! ( isascii((c)) && isspace((c)))) 320 #define LDIF_CONSERVATIVE_FINALCHAR( c ) ( (c) != ' ' ) 321 322 323 void 324 ldif_put_type_and_value_with_options( char **out, char *t, char *val, 325 int vlen, unsigned long options ) 326 { 327 unsigned char *p, *byte, *stop; 328 char *save; 329 int b64, len, savelen, wraplen; 330 len = 0; 331 332 if ( LDIF_OPT_ISSET( options, LDIF_OPT_NOWRAP )) { 333 wraplen = -1; 334 } else { 335 wraplen = LDIF_MAX_LINE_WIDTH; 336 } 337 338 /* put the type + ": " */ 339 for ( p = (unsigned char *) t; *p; p++, len++ ) { 340 *(*out)++ = *p; 341 } 342 *(*out)++ = ':'; 343 len++; 344 if ( LDIF_OPT_ISSET( options, LDIF_OPT_VALUE_IS_URL )) { 345 *(*out)++ = '<'; /* add '<' for URLs */ 346 len++; 347 } 348 save = *out; 349 savelen = len; 350 b64 = 0; 351 352 stop = (unsigned char *)val; 353 if ( val && vlen > 0 ) { 354 *(*out)++ = ' '; 355 stop = (unsigned char *) (val + vlen); 356 if ( LDIF_OPT_ISSET( options, LDIF_OPT_MINIMAL_ENCODING )) { 357 if ( !LDIF_SAFE_INITCHAR( val[0] )) { 358 b64 = 1; 359 } 360 } else { 361 if ( !LDIF_CONSERVATIVE_INITCHAR( val[0] ) || 362 !LDIF_CONSERVATIVE_FINALCHAR( val[vlen-1] )) { 363 b64 = 1; 364 } 365 } 366 } 367 368 if ( !b64 ) { 369 for ( byte = (unsigned char *) val; byte < stop; 370 byte++, len++ ) { 371 if ( LDIF_OPT_ISSET( options, 372 LDIF_OPT_MINIMAL_ENCODING )) { 373 if ( !LDIF_SAFE_CHAR( *byte )) { 374 b64 = 1; 375 break; 376 } 377 } else if ( !LDIF_CONSERVATIVE_CHAR( *byte )) { 378 b64 = 1; 379 break; 380 } 381 382 if ( wraplen != -1 && len > wraplen ) { 383 *(*out)++ = '\n'; 384 *(*out)++ = ' '; 385 len = 1; 386 } 387 *(*out)++ = *byte; 388 } 389 } 390 391 if ( b64 ) { 392 *out = save; 393 *(*out)++ = ':'; 394 *(*out)++ = ' '; 395 len = ldif_base64_encode_internal( (unsigned char *)val, *out, vlen, 396 savelen + 2, wraplen ); 397 *out += len; 398 } 399 400 *(*out)++ = '\n'; 401 } 402 403 void 404 ldif_put_type_and_value( char **out, char *t, char *val, int vlen ) 405 { 406 ldif_put_type_and_value_with_options( out, t, val, vlen, 0 ); 407 } 408 409 void 410 ldif_put_type_and_value_nowrap( char **out, char *t, char *val, int vlen ) 411 { 412 ldif_put_type_and_value_with_options( out, t, val, vlen, LDIF_OPT_NOWRAP ); 413 } 414 415 /* 416 * ldif_base64_encode_internal - encode "srclen" bytes in "src", place BASE64 417 * encoded bytes in "dst" and return the length of the BASE64 418 * encoded string. "dst" is also zero-terminated by this function. 419 * 420 * If "lenused" >= 0, newlines will be included in "dst" and "lenused" if 421 * appropriate. "lenused" should be a count of characters already used 422 * on the current line. The LDIF lines we create will contain at most 423 * "wraplen" characters on each line, unless "wraplen" is -1, in which 424 * case output line length is unlimited. 425 * 426 * If "lenused" < 0, no newlines will be included, and the LDIF_BASE64_LEN() 427 * macro can be used to determine how many bytes will be placed in "dst." 428 */ 429 430 static int 431 ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, int lenused, int wraplen ) 432 { 433 unsigned char *byte, *stop; 434 unsigned char buf[3]; 435 char *out; 436 unsigned long bits; 437 int i, pad, len; 438 439 len = 0; 440 out = dst; 441 stop = src + srclen; 442 443 /* convert to base 64 (3 bytes => 4 base 64 digits) */ 444 for ( byte = src; byte < stop - 2; byte += 3 ) { 445 bits = (byte[0] & 0xff) << 16; 446 bits |= (byte[1] & 0xff) << 8; 447 bits |= (byte[2] & 0xff); 448 449 for ( i = 0; i < 4; i++, bits <<= 6 ) { 450 if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) { 451 *out++ = '\n'; 452 *out++ = ' '; 453 lenused = 2; 454 } 455 456 /* get b64 digit from high order 6 bits */ 457 *out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ]; 458 } 459 } 460 461 /* add padding if necessary */ 462 if ( byte < stop ) { 463 for ( i = 0; byte + i < stop; i++ ) { 464 buf[i] = byte[i]; 465 } 466 for ( pad = 0; i < 3; i++, pad++ ) { 467 buf[i] = '\0'; 468 } 469 byte = buf; 470 bits = (byte[0] & 0xff) << 16; 471 bits |= (byte[1] & 0xff) << 8; 472 bits |= (byte[2] & 0xff); 473 474 for ( i = 0; i < 4; i++, bits <<= 6 ) { 475 if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) { 476 *out++ = '\n'; 477 *out++ = ' '; 478 lenused = 2; 479 } 480 481 if (( i == 3 && pad > 0 ) || ( i == 2 && pad == 2 )) { 482 /* Pad as appropriate */ 483 *out++ = '='; 484 } else { 485 /* get b64 digit from low order 6 bits */ 486 *out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ]; 487 } 488 } 489 } 490 491 *out = '\0'; 492 493 return( out - dst ); 494 } 495 496 int 497 ldif_base64_encode( unsigned char *src, char *dst, int srclen, int lenused ) 498 { 499 return ldif_base64_encode_internal( src, dst, srclen, lenused, LDIF_MAX_LINE_WIDTH ); 500 } 501 502 int 503 ldif_base64_encode_nowrap( unsigned char *src, char *dst, int srclen, int lenused ) 504 { 505 return ldif_base64_encode_internal( src, dst, srclen, lenused, -1 ); 506 } 507 508 509 /* 510 * return malloc'd, zero-terminated LDIF line 511 */ 512 char * 513 ldif_type_and_value_with_options( char *type, char *val, int vlen, 514 unsigned long options ) 515 { 516 char *buf, *p; 517 int tlen; 518 519 tlen = strlen( type ); 520 if (( buf = (char *)malloc( LDIF_SIZE_NEEDED( tlen, vlen ) + 1 )) != 521 NULL ) { 522 p = buf; 523 ldif_put_type_and_value_with_options( &p, type, val, vlen, options ); 524 *p = '\0'; 525 } 526 527 return( buf ); 528 } 529 530 char * 531 ldif_type_and_value( char *type, char *val, int vlen ) 532 { 533 return ldif_type_and_value_with_options( type, val, vlen, 0 ); 534 } 535 536 char * 537 ldif_type_and_value_nowrap( char *type, char *val, int vlen ) 538 { 539 return ldif_type_and_value_with_options( type, val, vlen, LDIF_OPT_NOWRAP ); 540 } 541 542 /* 543 * ldif_get_entry - read the next ldif entry from the FILE referenced 544 * by fp. return a pointer to a malloc'd, null-terminated buffer. also 545 * returned is the last line number read, in *lineno. 546 */ 547 char * 548 ldif_get_entry( FILE *fp, int *lineno ) 549 { 550 char line[BUFSIZ]; 551 char *buf; 552 int max, cur, len, gotsome; 553 554 buf = NULL; 555 max = cur = gotsome = 0; 556 while ( fgets( line, sizeof(line), fp ) != NULL ) { 557 if ( lineno != NULL ) { 558 (*lineno)++; 559 } 560 /* ldif entries are terminated by a \n on a line by itself */ 561 if ( line[0] == '\0' || line[0] == '\n' 562 #if !defined( XP_WIN32 ) 563 || ( line[0] == '\r' && line[1] == '\n' ) /* DOS format */ 564 #endif 565 ) { 566 if ( gotsome ) { 567 break; 568 } else { 569 continue; 570 } 571 } else if ( line[0] == '#' ) { 572 continue; 573 } 574 gotsome = 1; 575 len = strlen( line ); 576 #if !defined( XP_WIN32 ) 577 /* DOS format */ 578 if ( len > 0 && line[len-1] == '\r' ) { 579 --len; 580 line[len] = '\0'; 581 } else if ( len > 1 && line[len-2] == '\r' && line[len-1] == '\n' ) { 582 --len; 583 line[len-1] = line[len]; 584 line[len] = '\0'; 585 } 586 #endif 587 while ( cur + (len + 1) > max ) { 588 if ( buf == NULL ) { 589 max += BUFSIZ; 590 buf = (char *) malloc( max ); 591 } else { 592 max *= 2; 593 buf = (char *) realloc( buf, max ); 594 } 595 if ( buf == NULL ) { 596 return( NULL ); 597 } 598 } 599 600 memcpy( buf + cur, line, len + 1 ); 601 cur += len; 602 } 603 604 return( buf ); 605 } 606