xref: /illumos-gate/usr/src/lib/libldap5/sources/ldap/util/line64.c (revision 533affcbc7fc4d0c8132976ea454aaa715fe2307)
1 /*
2  * Copyright 2001-2002 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * The contents of this file are subject to the Netscape Public
8  * License Version 1.1 (the "License"); you may not use this file
9  * except in compliance with the License. You may obtain a copy of
10  * the License at http://www.mozilla.org/NPL/
11  *
12  * Software distributed under the License is distributed on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
14  * implied. See the License for the specific language governing
15  * rights and limitations under the License.
16  *
17  * The Original Code is Mozilla Communicator client code, released
18  * March 31, 1998.
19  *
20  * The Initial Developer of the Original Code is Netscape
21  * Communications Corporation. Portions created by Netscape are
22  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
23  * Rights Reserved.
24  *
25  * Contributor(s):
26  */
27 
28 /* line64.c - routines for dealing with the slapd line format */
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <ctype.h>
34 #ifndef macintosh
35 #include <sys/types.h>
36 #endif
37 #ifdef _WIN32
38 #include <windows.h>
39 #elif !defined( macintosh )
40 #include <sys/socket.h>
41 #endif
42 #include "ldaplog.h"
43 #include "ldif.h"
44 
45 #ifndef isascii
46 #define isascii( c )	(!((c) & ~0177))
47 #endif
48 
49 #define RIGHT2			0x03
50 #define RIGHT4			0x0f
51 #define CONTINUED_LINE_MARKER	'\001'
52 
53 #define ISBLANK(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') /* not "\r\v\f" */
54 
55 #define LDIF_OPT_ISSET( value, opt )	(((value) & (opt)) != 0 )
56 
57 static char nib2b64[0x40] =
58         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
59 
60 static unsigned char b642nib[0x80] = {
61 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
62 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
63 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
64 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 	0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
67 	0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
68 	0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 	0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
70 	0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
71 	0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
72 	0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
73 	0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
74 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
75 	0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
76 	0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff
77 };
78 
79 static int ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen,
80 	int lenused, int wraplen );
81 
82 /*
83  * str_parse_line - takes a line of the form "type:[:] value" and splits it
84  * into components "type" and "value".  if a double colon separates type from
85  * value, then value is encoded in base 64, and parse_line un-decodes it
86  * (in place) before returning.
87  */
88 
89 int
90 str_parse_line(
91     char	*line,
92     char	**type,
93     char	**value,
94     int		*vlen
95 )
96 {
97 	char	*p, *s, *d;
98 	int	b64;
99 
100 	/* skip any leading space */
101 	while ( ISBLANK( *line ) ) {
102 		line++;
103 	}
104 	*type = line;
105 
106 	for ( s = line; *s && *s != ':'; s++ )
107 		;	/* NULL */
108 	if ( *s == '\0' ) {
109 
110 		/* Comment-out while we address calling libldif from ns-back-ldbm
111 			on NT. 1 of 3 */
112 #if defined( _WIN32 )
113 		/*
114 #endif
115 		 LDAPDebug( LDAP_DEBUG_PARSE, "str_parse_line: missing ':' "
116 			"on line \"%s\"\n", line, 0, 0 );
117 #if defined( _WIN32 )
118 		*/
119 #endif
120 		return( -1 );
121 	}
122 
123 	/* trim any space between type and : */
124 	for ( p = s - 1; p > line && ISBLANK( *p ); p-- ) {
125 		*p = '\0';
126 	}
127 	*s++ = '\0';
128 
129 	/* check for double : - indicates base 64 encoded value */
130 	if ( *s == ':' ) {
131 		s++;
132 		b64 = 1;
133 
134 	/* single : - normally encoded value */
135 	} else {
136 		b64 = 0;
137 	}
138 
139 	/* skip space between : and value */
140 	while ( ISBLANK( *s ) ) {
141 		s++;
142 	}
143 
144 	/*
145 	 * If no value is present, return a zero-length string for
146 	 * *value, with *vlen set to zero.
147 	 */
148 	if ( *s == '\0' ) {
149 		*value = s;
150 		*vlen = 0;
151 		return( 0 );
152 	}
153 
154 	/* check for continued line markers that should be deleted */
155 	for ( p = s, d = s; *p; p++ ) {
156 		if ( *p != CONTINUED_LINE_MARKER )
157 			*d++ = *p;
158 	}
159 	*d = '\0';
160 
161 	*value = s;
162 	if ( b64 ) {
163 		if (( *vlen = ldif_base64_decode( s, (unsigned char *)s ))
164 		    < 0 ) {
165 			/* Comment-out while we address calling libldif from ns-back-ldbm
166 				on NT. 3 of 3 */
167 #if defined( _WIN32 )
168 		/*
169 #endif
170 			 LDAPDebug( LDAP_DEBUG_ANY,
171 			    "str_parse_line: invalid base 64 char on line \"%s\"\n",
172 			    line, 0, 0 );
173 #if defined( _WIN32 )
174 		*/
175 #endif
176 			return( -1 );
177 		}
178 		s[ *vlen ] = '\0';
179 	} else {
180 		*vlen = (int) (d - s);
181 	}
182 
183 	return( 0 );
184 }
185 
186 
187 /*
188  * ldif_base64_decode - take the BASE64-encoded characters in "src"
189  * (a zero-terminated string) and decode them into the the buffer "dst".
190  * "src" and "dst" can be the same if in-place decoding is desired.
191  * "dst" must be large enough to hold the decoded octets.  No more than
192  *	3 * strlen( src ) / 4 bytes will be produced.
193  * "dst" may contain zero octets anywhere within it, but it is not
194  *	zero-terminated by this function.
195  *
196  * The number of bytes copied to "dst" is returned if all goes well.
197  * -1 is returned if the BASE64 encoding in "src" is invalid.
198  */
199 
200 int
201 ldif_base64_decode( char *src, unsigned char *dst )
202 {
203 	char		*p, *stop;
204 	unsigned char	nib, *byte;
205 	int		i, len;
206 
207 	stop = strchr( src, '\0' );
208 	byte = dst;
209 	for ( p = src, len = 0; p < stop; p += 4, len += 3 ) {
210 		for ( i = 0; i < 4; i++ ) {
211 			if ( p[i] != '=' && (p[i] & 0x80 ||
212 			    b642nib[ p[i] & 0x7f ] > 0x3f) ) {
213 				return( -1 );
214 			}
215 		}
216 
217 		/* first digit */
218 		nib = b642nib[ p[0] & 0x7f ];
219 		byte[0] = nib << 2;
220 
221 		/* second digit */
222 		nib = b642nib[ p[1] & 0x7f ];
223 		byte[0] |= nib >> 4;
224 
225 		/* third digit */
226 		if ( p[2] == '=' ) {
227 			len += 1;
228 			break;
229 		}
230 		byte[1] = (nib & RIGHT4) << 4;
231 		nib = b642nib[ p[2] & 0x7f ];
232 		byte[1] |= nib >> 2;
233 
234 		/* fourth digit */
235 		if ( p[3] == '=' ) {
236 			len += 2;
237 			break;
238 		}
239 		byte[2] = (nib & RIGHT2) << 6;
240 		nib = b642nib[ p[3] & 0x7f ];
241 		byte[2] |= nib;
242 
243 		byte += 3;
244 	}
245 
246 	return( len );
247 }
248 
249 /*
250  * str_getline - return the next "line" (minus newline) of input from a
251  * string buffer of lines separated by newlines, terminated by \n\n
252  * or \0.  this routine handles continued lines, bundling them into
253  * a single big line before returning.  if a line begins with a white
254  * space character, it is a continuation of the previous line. the white
255  * space character (nb: only one char), and preceeding newline are changed
256  * into CONTINUED_LINE_MARKER chars, to be deleted later by the
257  * str_parse_line() routine above.
258  *
259  * it takes a pointer to a pointer to the buffer on the first call,
260  * which it updates and must be supplied on subsequent calls.
261  *
262  * XXX need to update this function to also support <CR><LF> as EOL.
263  * XXX supports <CR><LF> as of 07/29/1998 (richm)
264  */
265 
266 char *
267 str_getline( char **next )
268 {
269 	char	*l;
270 	char	c;
271 	char	*p;
272 
273 	if ( *next == NULL || **next == '\n' || **next == '\0' ) {
274 		return( NULL );
275 	}
276 
277 	while ( **next == '#' ) {	/* skip comment lines */
278 		if (( *next = strchr( *next, '\n' )) == NULL ) {
279 			return( NULL );
280 		}
281 		(*next)++;
282 	}
283 
284 	l = *next;
285 	while ( (*next = strchr( *next, '\n' )) != NULL ) {
286 		p = *next - 1; /* pointer to character previous to the newline */
287 		c = *(*next + 1); /* character after the newline */
288 		if ( ISBLANK( c ) && c != '\n' ) {
289 			/* DOS EOL is \r\n, so if the character before */
290 			/* the \n is \r, continue it too */
291 			if (*p == '\r')
292 				*p = CONTINUED_LINE_MARKER;
293 			**next = CONTINUED_LINE_MARKER;
294 			*(*next+1) = CONTINUED_LINE_MARKER;
295 		} else {
296 			/* DOS EOL is \r\n, so if the character before */
297 			/* the \n is \r, null it too */
298 			if (*p == '\r')
299 				*p = '\0';
300 			*(*next)++ = '\0';
301 			break;
302 		}
303 		(*next)++;
304 	}
305 
306 	return( l );
307 }
308 
309 
310 #define LDIF_SAFE_CHAR( c )		( (c) != '\r' && (c) != '\n' )
311 #define LDIF_CONSERVATIVE_CHAR( c )	( LDIF_SAFE_CHAR(c) && isascii((c)) \
312 					 && ( isprint((c)) || (c) == '\t' ))
313 #define LDIF_SAFE_INITCHAR( c )		( LDIF_SAFE_CHAR(c) && (c) != ':' \
314 					 && (c) != ' ' && (c) != '<' )
315 #define LDIF_CONSERVATIVE_INITCHAR( c ) ( LDIF_SAFE_INITCHAR( c ) && \
316 					 ! ( isascii((c)) && isspace((c))))
317 #define LDIF_CONSERVATIVE_FINALCHAR( c ) ( (c) != ' ' )
318 
319 
320 void
321 ldif_put_type_and_value_with_options( char **out, char *t, char *val,
322 	int vlen, unsigned long options )
323 {
324 	unsigned char	*p, *byte, *stop;
325 	char		*save;
326 	int		b64, len, savelen, wraplen;
327 	len = 0;
328 
329 	if ( LDIF_OPT_ISSET( options, LDIF_OPT_NOWRAP )) {
330 		wraplen = -1;
331 	} else {
332 		wraplen = LDIF_MAX_LINE_WIDTH;
333 	}
334 
335 	/* put the type + ": " */
336 	for ( p = (unsigned char *) t; *p; p++, len++ ) {
337 		*(*out)++ = *p;
338 	}
339 	*(*out)++ = ':';
340 	len++;
341 	if ( LDIF_OPT_ISSET( options, LDIF_OPT_VALUE_IS_URL )) {
342 		*(*out)++ = '<';	/* add '<' for URLs */
343 		len++;
344 	}
345 	save = *out;
346 	savelen = len;
347 	b64 = 0;
348 
349 	stop = (unsigned char *)val;
350 	if ( val && vlen > 0 ) {
351 		*(*out)++ = ' ';
352 		stop = (unsigned char *) (val + vlen);
353 		if ( LDIF_OPT_ISSET( options, LDIF_OPT_MINIMAL_ENCODING )) {
354 			if ( !LDIF_SAFE_INITCHAR( val[0] )) {
355 				b64 = 1;
356 			}
357 		} else {
358 			if ( !LDIF_CONSERVATIVE_INITCHAR( val[0] ) ||
359 				 !LDIF_CONSERVATIVE_FINALCHAR( val[vlen-1] )) {
360 				b64 = 1;
361 			}
362 		}
363 	}
364 
365 	if ( !b64 ) {
366 		for ( byte = (unsigned char *) val; byte < stop;
367 		    byte++, len++ ) {
368 			if ( LDIF_OPT_ISSET( options,
369 			    LDIF_OPT_MINIMAL_ENCODING )) {
370 				if ( !LDIF_SAFE_CHAR( *byte )) {
371 					b64 = 1;
372 					break;
373 				}
374 			} else if ( !LDIF_CONSERVATIVE_CHAR( *byte )) {
375 				b64 = 1;
376 				break;
377 			}
378 
379 			if ( wraplen != -1 && len > wraplen ) {
380 				*(*out)++ = '\n';
381 				*(*out)++ = ' ';
382 				len = 1;
383 			}
384 			*(*out)++ = *byte;
385 		}
386 	}
387 
388 	if ( b64 ) {
389 		*out = save;
390 		*(*out)++ = ':';
391 		*(*out)++ = ' ';
392 		len = ldif_base64_encode_internal( (unsigned char *)val, *out, vlen,
393 		    savelen + 2, wraplen );
394 		*out += len;
395 	}
396 
397 	*(*out)++ = '\n';
398 }
399 
400 void
401 ldif_put_type_and_value( char **out, char *t, char *val, int vlen )
402 {
403     ldif_put_type_and_value_with_options( out, t, val, vlen, 0 );
404 }
405 
406 void
407 ldif_put_type_and_value_nowrap( char **out, char *t, char *val, int vlen )
408 {
409     ldif_put_type_and_value_with_options( out, t, val, vlen, LDIF_OPT_NOWRAP );
410 }
411 
412 /*
413  * ldif_base64_encode_internal - encode "srclen" bytes in "src", place BASE64
414  * encoded bytes in "dst" and return the length of the BASE64
415  * encoded string.  "dst" is also zero-terminated by this function.
416  *
417  * If "lenused" >= 0, newlines will be included in "dst" and "lenused" if
418  * appropriate.  "lenused" should be a count of characters already used
419  * on the current line.  The LDIF lines we create will contain at most
420  * "wraplen" characters on each line, unless "wraplen" is -1, in which
421  * case output line length is unlimited.
422  *
423  * If "lenused" < 0, no newlines will be included, and the LDIF_BASE64_LEN()
424  * macro can be used to determine how many bytes will be placed in "dst."
425  */
426 
427 static int
428 ldif_base64_encode_internal( unsigned char *src, char *dst, int srclen, int lenused, int wraplen )
429 {
430 	unsigned char	*byte, *stop;
431 	unsigned char	buf[3];
432 	char		*out;
433 	unsigned long	bits;
434 	int		i, pad, len;
435 
436 	len = 0;
437 	out = dst;
438 	stop = src + srclen;
439 
440 	/* convert to base 64 (3 bytes => 4 base 64 digits) */
441 	for ( byte = src; byte < stop - 2; byte += 3 ) {
442 		bits = (byte[0] & 0xff) << 16;
443 		bits |= (byte[1] & 0xff) << 8;
444 		bits |= (byte[2] & 0xff);
445 
446 		for ( i = 0; i < 4; i++, bits <<= 6 ) {
447 			if ( wraplen != -1 &&  lenused >= 0 && lenused++ > wraplen ) {
448 				*out++ = '\n';
449 				*out++ = ' ';
450 				lenused = 2;
451 			}
452 
453 			/* get b64 digit from high order 6 bits */
454 			*out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ];
455 		}
456 	}
457 
458 	/* add padding if necessary */
459 	if ( byte < stop ) {
460 		for ( i = 0; byte + i < stop; i++ ) {
461 			buf[i] = byte[i];
462 		}
463 		for ( pad = 0; i < 3; i++, pad++ ) {
464 			buf[i] = '\0';
465 		}
466 		byte = buf;
467 		bits = (byte[0] & 0xff) << 16;
468 		bits |= (byte[1] & 0xff) << 8;
469 		bits |= (byte[2] & 0xff);
470 
471 		for ( i = 0; i < 4; i++, bits <<= 6 ) {
472 			if ( wraplen != -1 && lenused >= 0 && lenused++ > wraplen ) {
473 				*out++ = '\n';
474 				*out++ = ' ';
475 				lenused = 2;
476 			}
477 
478 			if (( i == 3 && pad > 0 ) || ( i == 2 && pad == 2 )) {
479 				/* Pad as appropriate */
480 				*out++ = '=';
481 			} else {
482 				/* get b64 digit from low order 6 bits */
483 				*out++ = nib2b64[ (bits & 0xfc0000L) >> 18 ];
484 			}
485 		}
486 	}
487 
488 	*out = '\0';
489 
490 	return( out - dst );
491 }
492 
493 int
494 ldif_base64_encode( unsigned char *src, char *dst, int srclen, int lenused )
495 {
496     return ldif_base64_encode_internal( src, dst, srclen, lenused, LDIF_MAX_LINE_WIDTH );
497 }
498 
499 int
500 ldif_base64_encode_nowrap( unsigned char *src, char *dst, int srclen, int lenused )
501 {
502     return ldif_base64_encode_internal( src, dst, srclen, lenused, -1 );
503 }
504 
505 
506 /*
507  * return malloc'd, zero-terminated LDIF line
508  */
509 char *
510 ldif_type_and_value_with_options( char *type, char *val, int vlen,
511 	unsigned long options )
512 {
513     char	*buf, *p;
514     int		tlen;
515 
516     tlen = strlen( type );
517     if (( buf = (char *)malloc( LDIF_SIZE_NEEDED( tlen, vlen ) + 1 )) !=
518 	    NULL ) {
519 	p = buf;
520 	ldif_put_type_and_value_with_options( &p, type, val, vlen, options );
521 	*p = '\0';
522     }
523 
524     return( buf );
525 }
526 
527 char *
528 ldif_type_and_value( char *type, char *val, int vlen )
529 {
530     return ldif_type_and_value_with_options( type, val, vlen, 0 );
531 }
532 
533 char *
534 ldif_type_and_value_nowrap( char *type, char *val, int vlen )
535 {
536     return ldif_type_and_value_with_options( type, val, vlen, LDIF_OPT_NOWRAP );
537 }
538 
539 /*
540  * ldif_get_entry - read the next ldif entry from the FILE referenced
541  * by fp. return a pointer to a malloc'd, null-terminated buffer. also
542  * returned is the last line number read, in *lineno.
543  */
544 char *
545 ldif_get_entry( FILE *fp, int *lineno )
546 {
547 	char	line[BUFSIZ];
548 	char	*buf;
549 	int	max, cur, len, gotsome;
550 
551 	buf = NULL;
552 	max = cur = gotsome = 0;
553 	while ( fgets( line, sizeof(line), fp ) != NULL ) {
554 		if ( lineno != NULL ) {
555 			(*lineno)++;
556 		}
557 		/* ldif entries are terminated by a \n on a line by itself */
558 		if ( line[0] == '\0' || line[0] == '\n'
559 #if !defined( XP_WIN32 )
560 		     || ( line[0] == '\r' && line[1] == '\n' ) /* DOS format */
561 #endif
562 		   ) {
563 			if ( gotsome ) {
564 				break;
565 			} else {
566 				continue;
567 			}
568 		} else if ( line[0] == '#' ) {
569 			continue;
570 		}
571 		gotsome = 1;
572 		len = strlen( line );
573 #if !defined( XP_WIN32 )
574 		/* DOS format */
575 		if ( len > 0 && line[len-1] == '\r' ) {
576 			--len;
577 			line[len] = '\0';
578 		} else if ( len > 1 && line[len-2] == '\r' && line[len-1] == '\n' ) {
579 			--len;
580 			line[len-1] = line[len];
581 			line[len] = '\0';
582 		}
583 #endif
584 		while ( cur + (len + 1) > max ) {
585 			if ( buf == NULL ) {
586 				max += BUFSIZ;
587 				buf = (char *) malloc( max );
588 			} else {
589 				max *= 2;
590 				buf = (char *) realloc( buf, max );
591 			}
592 			if ( buf == NULL ) {
593 				return( NULL );
594 			}
595 		}
596 
597 		memcpy( buf + cur, line, len + 1 );
598 		cur += len;
599 	}
600 
601 	return( buf );
602 }
603