1 /* 2 * Copyright 2001-2002 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* 7 * The contents of this file are subject to the Netscape Public 8 * License Version 1.1 (the "License"); you may not use this file 9 * except in compliance with the License. You may obtain a copy of 10 * the License at http://www.mozilla.org/NPL/ 11 * 12 * Software distributed under the License is distributed on an "AS 13 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 14 * implied. See the License for the specific language governing 15 * rights and limitations under the License. 16 * 17 * The Original Code is Mozilla Communicator client code, released 18 * March 31, 1998. 19 * 20 * The Initial Developer of the Original Code is Netscape 21 * Communications Corporation. Portions created by Netscape are 22 * Copyright (C) 1998-1999 Netscape Communications Corporation. All 23 * Rights Reserved. 24 * 25 * Contributor(s): 26 */ 27 28 /* line64.c - routines for dealing with the slapd line format */ 29 30 #include <stdio.h> 31 #include <string.h> 32 #include <stdlib.h> 33 #include <ctype.h> 34 #ifndef macintosh 35 #include <sys/types.h> 36 #endif 37 #ifdef _WIN32 38 #include <windows.h> 39 #elif !defined( macintosh ) 40 #include <sys/socket.h> 41 #endif 42 #include "ldaplog.h" 43 #include "ldif.h" 44 45 #ifndef isascii 46 #define isascii( c ) (!((c) & ~0177)) 47 #endif 48 49 #define RIGHT2 0x03 50 #define RIGHT4 0x0f 51 #define CONTINUED_LINE_MARKER '\001' 52 53 #define ISBLANK(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') /* not "\r\v\f" */ 54 55 #define LDIF_OPT_ISSET( value, opt ) (((value) & (opt)) != 0 ) 56 57 static char nib2b64[0x40] = 58 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 59 60 static unsigned char b642nib[0x80] = { 61 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 66 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 67 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 68 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 70 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 71 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 72 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 73 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 74 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 75 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 76 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff 77 }; 78 79 static int ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, 80 int lenused, int wraplen ); 81 82 /* 83 * str_parse_line - takes a line of the form "type:[:] value" and splits it 84 * into components "type" and "value". if a double colon separates type from 85 * value, then value is encoded in base 64, and parse_line un-decodes it 86 * (in place) before returning. 87 */ 88 89 int 90 str_parse_line( 91 char *line, 92 char **type, 93 char **value, 94 int *vlen 95 ) 96 { 97 char *p, *s, *d; 98 int b64; 99 100 /* skip any leading space */ 101 while ( ISBLANK( *line ) ) { 102 line++; 103 } 104 *type = line; 105 106 for ( s = line; *s && *s != ':'; s++ ) 107 ; /* NULL */ 108 if ( *s == '\0' ) { 109 110 /* Comment-out while we address calling libldif from ns-back-ldbm 111 on NT. 1 of 3 */ 112 #if defined( _WIN32 ) 113 /* 114 #endif 115 LDAPDebug( LDAP_DEBUG_PARSE, "str_parse_line: missing ':' " 116 "on line \"%s\"\n", line, 0, 0 ); 117 #if defined( _WIN32 ) 118 */ 119 #endif 120 return( -1 ); 121 } 122 123 /* trim any space between type and : */ 124 for ( p = s - 1; p > line && ISBLANK( *p ); p-- ) { 125 *p = '\0'; 126 } 127 *s++ = '\0'; 128 129 /* check for double : - indicates base 64 encoded value */ 130 if ( *s == ':' ) { 131 s++; 132 b64 = 1; 133 134 /* single : - normally encoded value */ 135 } else { 136 b64 = 0; 137 } 138 139 /* skip space between : and value */ 140 while ( ISBLANK( *s ) ) { 141 s++; 142 } 143 144 /* 145 * If no value is present, return a zero-length string for 146 * *value, with *vlen set to zero. 147 */ 148 if ( *s == '\0' ) { 149 *value = s; 150 *vlen = 0; 151 return( 0 ); 152 } 153 154 /* check for continued line markers that should be deleted */ 155 for ( p = s, d = s; *p; p++ ) { 156 if ( *p != CONTINUED_LINE_MARKER ) 157 *d++ = *p; 158 } 159 *d = '\0'; 160 161 *value = s; 162 if ( b64 ) { 163 if (( *vlen = ldif_base64_decode( s, (unsigned char *)s )) 164 < 0 ) { 165 /* Comment-out while we address calling libldif from ns-back-ldbm 166 on NT. 3 of 3 */ 167 #if defined( _WIN32 ) 168 /* 169 #endif 170 LDAPDebug( LDAP_DEBUG_ANY, 171 "str_parse_line: invalid base 64 char on line \"%s\"\n", 172 line, 0, 0 ); 173 #if defined( _WIN32 ) 174 */ 175 #endif 176 return( -1 ); 177 } 178 s[ *vlen ] = '\0'; 179 } else { 180 *vlen = (int) (d - s); 181 } 182 183 return( 0 ); 184 } 185 186 187 /* 188 * ldif_base64_decode - take the BASE64-encoded characters in "src" 189 * (a zero-terminated string) and decode them into the the buffer "dst". 190 * "src" and "dst" can be the same if in-place decoding is desired. 191 * "dst" must be large enough to hold the decoded octets. No more than 192 * 3 * strlen( src ) / 4 bytes will be produced. 193 * "dst" may contain zero octets anywhere within it, but it is not 194 * zero-terminated by this function. 195 * 196 * The number of bytes copied to "dst" is returned if all goes well. 197 * -1 is returned if the BASE64 encoding in "src" is invalid. 198 */ 199 200 int 201 ldif_base64_decode( char *src, unsigned char *dst ) 202 { 203 char *p, *stop; 204 unsigned char nib, *byte; 205 int i, len; 206 207 stop = strchr( src, '\0' ); 208 byte = dst; 209 for ( p = src, len = 0; p < stop; p += 4, len += 3 ) { 210 for ( i = 0; i < 4; i++ ) { 211 if ( p[i] != '=' && (p[i] & 0x80 || 212 b642nib[ p[i] & 0x7f ] > 0x3f) ) { 213 return( -1 ); 214 } 215 } 216 217 /* first digit */ 218 nib = b642nib[ p[0] & 0x7f ]; 219 byte[0] = nib << 2; 220 221 /* second digit */ 222 nib = b642nib[ p[1] & 0x7f ]; 223 byte[0] |= nib >> 4; 224 225 /* third digit */ 226 if ( p[2] == '=' ) { 227 len += 1; 228 break; 229 } 230 byte[1] = (nib & RIGHT4) << 4; 231 nib = b642nib[ p[2] & 0x7f ]; 232 byte[1] |= nib >> 2; 233 234 /* fourth digit */ 235 if ( p[3] == '=' ) { 236 len += 2; 237 break; 238 } 239 byte[2] = (nib & RIGHT2) << 6; 240 nib = b642nib[ p[3] & 0x7f ]; 241 byte[2] |= nib; 242 243 byte += 3; 244 } 245 246 return( len ); 247 } 248 249 /* 250 * str_getline - return the next "line" (minus newline) of input from a 251 * string buffer of lines separated by newlines, terminated by \n\n 252 * or \0. this routine handles continued lines, bundling them into 253 * a single big line before returning. if a line begins with a white 254 * space character, it is a continuation of the previous line. the white 255 * space character (nb: only one char), and preceeding newline are changed 256 * into CONTINUED_LINE_MARKER chars, to be deleted later by the 257 * str_parse_line() routine above. 258 * 259 * it takes a pointer to a pointer to the buffer on the first call, 260 * which it updates and must be supplied on subsequent calls. 261 * 262 * XXX need to update this function to also support <CR><LF> as EOL. 263 * XXX supports <CR><LF> as of 07/29/1998 (richm) 264 */ 265 266 char * 267 str_getline( char **next ) 268 { 269 char *l; 270 char c; 271 char *p; 272 273 if ( *next == NULL || **next == '\n' || **next == '\0' ) { 274 return( NULL ); 275 } 276 277 while ( **next == '#' ) { /* skip comment lines */ 278 if (( *next = strchr( *next, '\n' )) == NULL ) { 279 return( NULL ); 280 } 281 (*next)++; 282 } 283 284 l = *next; 285 while ( (*next = strchr( *next, '\n' )) != NULL ) { 286 p = *next - 1; /* pointer to character previous to the newline */ 287 c = *(*next + 1); /* character after the newline */ 288 if ( ISBLANK( c ) && c != '\n' ) { 289 /* DOS EOL is \r\n, so if the character before */ 290 /* the \n is \r, continue it too */ 291 if (*p == '\r') 292 *p = CONTINUED_LINE_MARKER; 293 **next = CONTINUED_LINE_MARKER; 294 *(*next+1) = CONTINUED_LINE_MARKER; 295 } else { 296 /* DOS EOL is \r\n, so if the character before */ 297 /* the \n is \r, null it too */ 298 if (*p == '\r') 299 *p = '\0'; 300 *(*next)++ = '\0'; 301 break; 302 } 303 (*next)++; 304 } 305 306 return( l ); 307 } 308 309 310 #define LDIF_SAFE_CHAR( c ) ( (c) != '\r' && (c) != '\n' ) 311 #define LDIF_CONSERVATIVE_CHAR( c ) ( LDIF_SAFE_CHAR(c) && isascii((c)) \ 312 && ( isprint((c)) || (c) == '\t' )) 313 #define LDIF_SAFE_INITCHAR( c ) ( LDIF_SAFE_CHAR(c) && (c) != ':' \ 314 && (c) != ' ' && (c) != '<' ) 315 #define LDIF_CONSERVATIVE_INITCHAR( c ) ( LDIF_SAFE_INITCHAR( c ) && \ 316 ! ( isascii((c)) && isspace((c)))) 317 #define LDIF_CONSERVATIVE_FINALCHAR( c ) ( (c) != ' ' ) 318 319 320 void 321 ldif_put_type_and_value_with_options( char **out, char *t, char *val, 322 int vlen, unsigned long options ) 323 { 324 unsigned char *p, *byte, *stop; 325 char *save; 326 int b64, len, savelen, wraplen; 327 len = 0; 328 329 if ( LDIF_OPT_ISSET( options, LDIF_OPT_NOWRAP )) { 330 wraplen = -1; 331 } else { 332 wraplen = LDIF_MAX_LINE_WIDTH; 333 } 334 335 /* put the type + ": " */ 336 for ( p = (unsigned char *) t; *p; p++, len++ ) { 337 *(*out)++ = *p; 338 } 339 *(*out)++ = ':'; 340 len++; 341 if ( LDIF_OPT_ISSET( options, LDIF_OPT_VALUE_IS_URL )) { 342 *(*out)++ = '<'; /* add '<' for URLs */ 343 len++; 344 } 345 save = *out; 346 savelen = len; 347 b64 = 0; 348 349 stop = (unsigned char *)val; 350 if ( val && vlen > 0 ) { 351 *(*out)++ = ' '; 352 stop = (unsigned char *) (val + vlen); 353 if ( LDIF_OPT_ISSET( options, LDIF_OPT_MINIMAL_ENCODING )) { 354 if ( !LDIF_SAFE_INITCHAR( val[0] )) { 355 b64 = 1; 356 } 357 } else { 358 if ( !LDIF_CONSERVATIVE_INITCHAR( val[0] ) || 359 !LDIF_CONSERVATIVE_FINALCHAR( val[vlen-1] )) { 360 b64 = 1; 361 } 362 } 363 } 364 365 if ( !b64 ) { 366 for ( byte = (unsigned char *) val; byte < stop; 367 byte++, len++ ) { 368 if ( LDIF_OPT_ISSET( options, 369 LDIF_OPT_MINIMAL_ENCODING )) { 370 if ( !LDIF_SAFE_CHAR( *byte )) { 371 b64 = 1; 372 break; 373 } 374 } else if ( !LDIF_CONSERVATIVE_CHAR( *byte )) { 375 b64 = 1; 376 break; 377 } 378 379 if ( wraplen != -1 && len > wraplen ) { 380 *(*out)++ = '\n'; 381 *(*out)++ = ' '; 382 len = 1; 383 } 384 *(*out)++ = *byte; 385 } 386 } 387 388 if ( b64 ) { 389 *out = save; 390 *(*out)++ = ':'; 391 *(*out)++ = ' '; 392 len = ldif_base64_encode_internal( (unsigned char *)val, *out, vlen, 393 savelen + 2, wraplen ); 394 *out += len; 395 } 396 397 *(*out)++ = '\n'; 398 } 399 400 void 401 ldif_put_type_and_value( char **out, char *t, char *val, int vlen ) 402 { 403 ldif_put_type_and_value_with_options( out, t, val, vlen, 0 ); 404 } 405 406 void 407 ldif_put_type_and_value_nowrap( char **out, char *t, char *val, int vlen ) 408 { 409 ldif_put_type_and_value_with_options( out, t, val, vlen, LDIF_OPT_NOWRAP ); 410 } 411 412 /* 413 * ldif_base64_encode_internal - encode "srclen" bytes in "src", place BASE64 414 * encoded bytes in "dst" and return the length of the BASE64 415 * encoded string. "dst" is also zero-terminated by this function. 416 * 417 * If "lenused" >= 0, newlines will be included in "dst" and "lenused" if 418 * appropriate. "lenused" should be a count of characters already used 419 * on the current line. The LDIF lines we create will contain at most 420 * "wraplen" characters on each line, unless "wraplen" is -1, in which 421 * case output line length is unlimited. 422 * 423 * If "lenused" < 0, no newlines will be included, and the LDIF_BASE64_LEN() 424 * macro can be used to determine how many bytes will be placed in "dst." 425 */ 426 427 static int 428 ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, int lenused, int wraplen ) 429 { 430 unsigned char *byte, *stop; 431 unsigned char buf[3]; 432 char *out; 433 unsigned long bits; 434 int i, pad, len; 435 436 len = 0; 437 out = dst; 438 stop = src + srclen; 439 440 /* convert to base 64 (3 bytes => 4 base 64 digits) */ 441 for ( byte = src; byte < stop - 2; byte += 3 ) { 442 bits = (byte[0] & 0xff) << 16; 443 bits |= (byte[1] & 0xff) << 8; 444 bits |= (byte[2] & 0xff); 445 446 for ( i = 0; i < 4; i++, bits <<= 6 ) { 447 if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) { 448 *out++ = '\n'; 449 *out++ = ' '; 450 lenused = 2; 451 } 452 453 /* get b64 digit from high order 6 bits */ 454 *out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ]; 455 } 456 } 457 458 /* add padding if necessary */ 459 if ( byte < stop ) { 460 for ( i = 0; byte + i < stop; i++ ) { 461 buf[i] = byte[i]; 462 } 463 for ( pad = 0; i < 3; i++, pad++ ) { 464 buf[i] = '\0'; 465 } 466 byte = buf; 467 bits = (byte[0] & 0xff) << 16; 468 bits |= (byte[1] & 0xff) << 8; 469 bits |= (byte[2] & 0xff); 470 471 for ( i = 0; i < 4; i++, bits <<= 6 ) { 472 if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) { 473 *out++ = '\n'; 474 *out++ = ' '; 475 lenused = 2; 476 } 477 478 if (( i == 3 && pad > 0 ) || ( i == 2 && pad == 2 )) { 479 /* Pad as appropriate */ 480 *out++ = '='; 481 } else { 482 /* get b64 digit from low order 6 bits */ 483 *out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ]; 484 } 485 } 486 } 487 488 *out = '\0'; 489 490 return( out - dst ); 491 } 492 493 int 494 ldif_base64_encode( unsigned char *src, char *dst, int srclen, int lenused ) 495 { 496 return ldif_base64_encode_internal( src, dst, srclen, lenused, LDIF_MAX_LINE_WIDTH ); 497 } 498 499 int 500 ldif_base64_encode_nowrap( unsigned char *src, char *dst, int srclen, int lenused ) 501 { 502 return ldif_base64_encode_internal( src, dst, srclen, lenused, -1 ); 503 } 504 505 506 /* 507 * return malloc'd, zero-terminated LDIF line 508 */ 509 char * 510 ldif_type_and_value_with_options( char *type, char *val, int vlen, 511 unsigned long options ) 512 { 513 char *buf, *p; 514 int tlen; 515 516 tlen = strlen( type ); 517 if (( buf = (char *)malloc( LDIF_SIZE_NEEDED( tlen, vlen ) + 1 )) != 518 NULL ) { 519 p = buf; 520 ldif_put_type_and_value_with_options( &p, type, val, vlen, options ); 521 *p = '\0'; 522 } 523 524 return( buf ); 525 } 526 527 char * 528 ldif_type_and_value( char *type, char *val, int vlen ) 529 { 530 return ldif_type_and_value_with_options( type, val, vlen, 0 ); 531 } 532 533 char * 534 ldif_type_and_value_nowrap( char *type, char *val, int vlen ) 535 { 536 return ldif_type_and_value_with_options( type, val, vlen, LDIF_OPT_NOWRAP ); 537 } 538 539 /* 540 * ldif_get_entry - read the next ldif entry from the FILE referenced 541 * by fp. return a pointer to a malloc'd, null-terminated buffer. also 542 * returned is the last line number read, in *lineno. 543 */ 544 char * 545 ldif_get_entry( FILE *fp, int *lineno ) 546 { 547 char line[BUFSIZ]; 548 char *buf; 549 int max, cur, len, gotsome; 550 551 buf = NULL; 552 max = cur = gotsome = 0; 553 while ( fgets( line, sizeof(line), fp ) != NULL ) { 554 if ( lineno != NULL ) { 555 (*lineno)++; 556 } 557 /* ldif entries are terminated by a \n on a line by itself */ 558 if ( line[0] == '\0' || line[0] == '\n' 559 #if !defined( XP_WIN32 ) 560 || ( line[0] == '\r' && line[1] == '\n' ) /* DOS format */ 561 #endif 562 ) { 563 if ( gotsome ) { 564 break; 565 } else { 566 continue; 567 } 568 } else if ( line[0] == '#' ) { 569 continue; 570 } 571 gotsome = 1; 572 len = strlen( line ); 573 #if !defined( XP_WIN32 ) 574 /* DOS format */ 575 if ( len > 0 && line[len-1] == '\r' ) { 576 --len; 577 line[len] = '\0'; 578 } else if ( len > 1 && line[len-2] == '\r' && line[len-1] == '\n' ) { 579 --len; 580 line[len-1] = line[len]; 581 line[len] = '\0'; 582 } 583 #endif 584 while ( cur + (len + 1) > max ) { 585 if ( buf == NULL ) { 586 max += BUFSIZ; 587 buf = (char *) malloc( max ); 588 } else { 589 max *= 2; 590 buf = (char *) realloc( buf, max ); 591 } 592 if ( buf == NULL ) { 593 return( NULL ); 594 } 595 } 596 597 memcpy( buf + cur, line, len + 1 ); 598 cur += len; 599 } 600 601 return( buf ); 602 } 603