xref: /freebsd/contrib/file/src/vasprintf.c (revision f5ef5f675d9d9eb6e35ed9142d70ecf774456ad4)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*###########################################################################
29   #                                                                           #
30   #                                vasprintf                                  #
31   #                                                                           #
32   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
33   #                                                                           #
34   ###########################################################################*/
35 
36 /*
37 
38 This software is distributed under the "modified BSD licence".
39 
40 This software is also released with GNU license (GPL) in another file (same
41 source-code, only license differ).
42 
43 
44 
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions are met:
47 
48 Redistributions of source code must retain the above copyright notice, this
49 list of conditions and the following disclaimer. Redistributions in binary
50 form must reproduce the above copyright notice, this list of conditions and
51 the following disclaimer in the documentation and/or other materials
52 provided with the distribution. The name of the author may not be used to
53 endorse or promote products derived from this software without specific
54 prior written permission.
55 
56 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
57 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
58 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
59 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
61 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
62 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
63 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
64 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
65 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66 
67 ====================
68 
69 Hacked from xnprintf version of 26th February 2005 to provide only
70 vasprintf by Reuben Thomas <rrt@sc3d.org>.
71 
72 ====================
73 
74 
75 'printf' function family use the following format string:
76 
77 %[flag][width][.prec][modifier]type
78 
79 %% is the escape sequence to print a '%'
80 %  followed by an unknown format will print the characters without
81 trying to do any interpretation
82 
83 flag:   none   +     -     #     (blank)
84 width:  n    0n    *
85 prec:   none   .0    .n     .*
86 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
87 type:  d i o u x X f e g E G c s p n
88 
89 
90 The function needs to allocate memory to store the full text before to
91 actually writing it.  i.e if you want to fnprintf() 1000 characters, the
92 functions will allocate 1000 bytes.
93 This behaviour can be modified: you have to customise the code to flush the
94 internal buffer (writing to screen or file) when it reach a given size. Then
95 the buffer can have a shorter length. But what? If you really need to write
96 HUGE string, don't use printf!
97 During the process, some other memory is allocated (1024 bytes minimum)
98 to handle the output of partial sprintf() calls. If you have only 10000 bytes
99 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text.
100 
101 note: if a buffer overflow occurs, exit() is called. This situation should
102 never appear ... but if you want to be *really* sure, you have to modify the
103 code to handle those situations (only one place to modify).
104 A buffer overflow can only occur if your sprintf() do strange things or when
105 you use strange formats.
106 
107 */
108 #include "file.h"
109 
110 #ifndef	lint
111 FILE_RCSID("@(#)$File: vasprintf.c,v 1.14 2017/08/13 00:21:47 christos Exp $")
112 #endif	/* lint */
113 
114 #include <assert.h>
115 #include <string.h>
116 #include <stdlib.h>
117 #include <stdarg.h>
118 #include <ctype.h>
119 #ifdef HAVE_LIMITS_H
120 #include <limits.h>
121 #endif
122 #ifdef HAVE_STDDEF_H
123 #include <stddef.h>
124 #endif
125 
126 #define ALLOC_CHUNK 2048
127 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
128 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
129 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
130 #endif
131 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
132 
133 /*
134  *  To save a lot of push/pop, every variable are stored into this
135  *  structure, which is passed among nearly every sub-functions.
136  */
137 typedef struct {
138   const char * src_string;        /* current position into intput string */
139   char *       buffer_base;       /* output buffer */
140   char *       dest_string;       /* current position into output string */
141   size_t       buffer_len;        /* length of output buffer */
142   size_t       real_len;          /* real current length of output text */
143   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
144   size_t       maxlen;
145   va_list      vargs;             /* pointer to current position into vargs */
146   char *       sprintf_string;
147   FILE *       fprintf_file;
148 } xprintf_struct;
149 
150 /*
151  *  Realloc buffer if needed
152  *  Return value:  0 = ok
153  *               EOF = not enought memory
154  */
155 static int realloc_buff(xprintf_struct *s, size_t len)
156 {
157   char * ptr;
158 
159   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
160     len += s->real_len + ALLOC_CHUNK;
161     ptr = (char *)realloc((void *)(s->buffer_base), len);
162     if (ptr == NULL) {
163       s->buffer_base = NULL;
164       return EOF;
165     }
166 
167     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
168     s->buffer_base = ptr;
169     s->buffer_len = len;
170 
171     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
172   }
173 
174   return 0;
175 }
176 
177 /*
178  *  Prints 'usual' characters    up to next '%'
179  *                            or up to end of text
180  */
181 static int usual_char(xprintf_struct * s)
182 {
183   size_t len;
184 
185   len = strcspn(s->src_string, "%");     /* reachs the next '%' or end of input string */
186   /* note: 'len' is never 0 because the presence of '%' */
187   /* or end-of-line is checked in the calling function  */
188 
189   if (realloc_buff(s,len) == EOF)
190     return EOF;
191 
192   memcpy(s->dest_string, s->src_string, len);
193   s->src_string += len;
194   s->dest_string += len;
195   s->real_len += len;
196   s->pseudo_len += len;
197 
198   return 0;
199 }
200 
201 /*
202  *  Return value: 0 = ok
203  *                EOF = error
204  */
205 static int print_it(xprintf_struct *s, size_t approx_len,
206                     const char *format_string, ...)
207 {
208   va_list varg;
209   int vsprintf_len;
210   size_t len;
211 
212   if (realloc_buff(s,approx_len) == EOF)
213     return EOF;
214 
215   va_start(varg, format_string);
216   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
217   va_end(varg);
218 
219   /* Check for overflow */
220   assert((s->buffer_base)[s->buffer_len - 1] == 1);
221 
222   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
223     return EOF;
224 
225   s->pseudo_len += vsprintf_len;
226   len = strlen(s->dest_string);
227   s->real_len += len;
228   s->dest_string += len;
229 
230   return 0;
231 }
232 
233 /*
234  *  Prints a string (%s)
235  *  We need special handling because:
236  *     a: the length of the string is unknown
237  *     b: when .prec is used, we must not access any extra byte of the
238  *        string (of course, if the original sprintf() does... what the
239  *        hell, not my problem)
240  *
241  *  Return value: 0 = ok
242  *                EOF = error
243  */
244 static int type_s(xprintf_struct *s, int width, int prec,
245                   const char *format_string, const char *arg_string)
246 {
247   size_t string_len;
248 
249   if (arg_string == NULL)
250     return print_it(s, (size_t)6, "(null)", 0);
251 
252   /* hand-made strlen() whitch stops when 'prec' is reached. */
253   /* if 'prec' is -1 then it is never reached. */
254   string_len = 0;
255   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
256     string_len++;
257 
258   if (width != -1 && string_len < (size_t)width)
259     string_len = (size_t)width;
260 
261   return print_it(s, string_len, format_string, arg_string);
262 }
263 
264 /*
265  *  Read a serie of digits. Stop when non-digit is found.
266  *  Return value: the value read (between 0 and 32767).
267  *  Note: no checks are made against overflow. If the string contain a big
268  *  number, then the return value won't be what we want (but, in this case,
269  *  the programmer don't know whatr he wants, then no problem).
270  */
271 static int getint(const char **string)
272 {
273   int i = 0;
274 
275   while (isdigit((unsigned char)**string) != 0) {
276     i = i * 10 + (**string - '0');
277     (*string)++;
278   }
279 
280   if (i < 0 || i > 32767)
281     i = 32767; /* if we have i==-10 this is not because the number is */
282   /* negative; this is because the number is big */
283   return i;
284 }
285 
286 /*
287  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
288  *  or '%%' escape sequence (to print a single '%') or any combination of
289  *  format specifier (ie "%i" or "%10.2d").
290  *  After the current part is managed, the function returns to caller with
291  *  everything ready to manage the following part.
292  *  The caller must ensure than the string is not empty, i.e. the first byte
293  *  is not zero.
294  *
295  *  Return value:  0 = ok
296  *                 EOF = error
297  */
298 static int dispatch(xprintf_struct *s)
299 {
300   const char *initial_ptr;
301   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
302   char *format_ptr;
303   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
304   int width, prec, modifier, approx_width;
305   char type;
306   /* most of those variables are here to rewrite the format string */
307 
308 #define SRCTXT  (s->src_string)
309 #define DESTTXT (s->dest_string)
310 
311   /* incoherent format string. Characters after the '%' will be printed with the next call */
312 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
313 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
314 
315   /* 'normal' text */
316   if (*SRCTXT != '%')
317     return usual_char(s);
318 
319   /* we then have a '%' */
320   SRCTXT++;
321   /* don't check for end-of-string ; this is done later */
322 
323   /* '%%' escape sequence */
324   if (*SRCTXT == '%') {
325     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
326       return EOF;
327     *DESTTXT = '%';
328     DESTTXT++;
329     SRCTXT++;
330     (s->real_len)++;
331     (s->pseudo_len)++;
332     return 0;
333   }
334 
335   /* '%' managing */
336   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
337   /* 'decoding'. Points just after the '%' so the '%' */
338   /* won't be printed in any case, as required. */
339 
340   /* flag */
341   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
342 
343   for (;; SRCTXT++) {
344     if (*SRCTXT == ' ')
345       flag_space = 1;
346     else if (*SRCTXT == '+')
347       flag_plus = 1;
348     else if (*SRCTXT == '-')
349       flag_minus = 1;
350     else if (*SRCTXT == '#')
351       flag_sharp = 1;
352     else if (*SRCTXT == '0')
353       flag_zero = 1;
354     else
355       break;
356   }
357 
358   INCOHERENT_TEST();    /* here is the first test for end of string */
359 
360   /* width */
361   if (*SRCTXT == '*') {         /* width given by next argument */
362     SRCTXT++;
363     width = va_arg(s->vargs, int);
364     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
365       width = 0x3fff;
366   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
367     width = getint(&SRCTXT);
368   else
369     width = -1;                 /* no width specified */
370 
371   INCOHERENT_TEST();
372 
373   /* .prec */
374   if (*SRCTXT == '.') {
375     SRCTXT++;
376     if (*SRCTXT == '*') {       /* .prec given by next argument */
377       SRCTXT++;
378       prec = va_arg(s->vargs, int);
379       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
380         prec = 0x3fff;
381     } else {                    /* .prec given as ASCII number */
382       if (isdigit((unsigned char)*SRCTXT) == 0)
383         INCOHERENT();
384       prec = getint(&SRCTXT);
385     }
386     INCOHERENT_TEST();
387   } else
388     prec = -1;                  /* no .prec specified */
389 
390   /* modifier */
391   switch (*SRCTXT) {
392   case 'L':
393   case 'h':
394   case 'l':
395   case 'z':
396   case 't':
397     modifier = *SRCTXT;
398     SRCTXT++;
399     if (modifier=='l' && *SRCTXT=='l') {
400       SRCTXT++;
401       modifier = 'L';  /* 'll' == 'L'      long long == long double */
402     } /* only for compatibility ; not portable */
403     INCOHERENT_TEST();
404     break;
405   default:
406     modifier = -1;              /* no modifier specified */
407     break;
408   }
409 
410   /* type */
411   type = *SRCTXT;
412   if (strchr("diouxXfegEGcspn",type) == NULL)
413     INCOHERENT();               /* unknown type */
414   SRCTXT++;
415 
416   /* rewrite format-string */
417   format_string[0] = '%';
418   format_ptr = &(format_string[1]);
419 
420   if (flag_plus) {
421     *format_ptr = '+';
422     format_ptr++;
423   }
424   if (flag_minus) {
425     *format_ptr = '-';
426     format_ptr++;
427   }
428   if (flag_space) {
429     *format_ptr = ' ';
430     format_ptr++;
431   }
432   if (flag_sharp) {
433     *format_ptr = '#';
434     format_ptr++;
435   }
436   if (flag_zero) {
437     *format_ptr = '0';
438     format_ptr++;
439   } /* '0' *must* be the last one */
440 
441   if (width != -1) {
442     sprintf(format_ptr, "%i", width);
443     format_ptr += strlen(format_ptr);
444   }
445 
446   if (prec != -1) {
447     *format_ptr = '.';
448     format_ptr++;
449     sprintf(format_ptr, "%i", prec);
450     format_ptr += strlen(format_ptr);
451   }
452 
453   if (modifier != -1) {
454     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
455       *format_ptr = 'l';
456       format_ptr++;
457       *format_ptr = 'l';
458       format_ptr++;
459     } else {
460       *format_ptr = modifier;
461       format_ptr++;
462     }
463   }
464 
465   *format_ptr = type;
466   format_ptr++;
467   *format_ptr = 0;
468 
469   /* vague approximation of minimal length if width or prec are specified */
470   approx_width = width + prec;
471   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
472     approx_width = 0;
473 
474   switch (type) {
475     /* int */
476   case 'd':
477   case 'i':
478   case 'o':
479   case 'u':
480   case 'x':
481   case 'X':
482     switch (modifier) {
483     case -1 :
484       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
485     case 'L':
486       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
487     case 'l':
488       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
489     case 'h':
490       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
491     case 'z':
492       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
493     case 't':
494       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
495       /* 'int' instead of 'short int' because default promotion is 'int' */
496     default:
497       INCOHERENT();
498     }
499 
500     /* char */
501   case 'c':
502     if (modifier != -1)
503       INCOHERENT();
504     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
505     /* 'int' instead of 'char' because default promotion is 'int' */
506 
507     /* math */
508   case 'e':
509   case 'f':
510   case 'g':
511   case 'E':
512   case 'G':
513     switch (modifier) {
514     case -1 : /* because of default promotion, no modifier means 'l' */
515     case 'l':
516       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
517     case 'L':
518       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
519     default:
520       INCOHERENT();
521     }
522 
523     /* string */
524   case 's':
525     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
526 
527     /* pointer */
528   case 'p':
529     if (modifier == -1)
530       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
531     INCOHERENT();
532 
533     /* store */
534   case 'n':
535     if (modifier == -1) {
536       int * p;
537       p = va_arg(s->vargs, int *);
538       if (p != NULL) {
539         *p = s->pseudo_len;
540         return 0;
541       }
542       return EOF;
543     }
544     INCOHERENT();
545 
546   } /* switch */
547 
548   INCOHERENT();                 /* unknown type */
549 
550 #undef INCOHERENT
551 #undef INCOHERENT_TEST
552 #undef SRCTXT
553 #undef DESTTXT
554 }
555 
556 /*
557  *  Return value: number of *virtually* written characters
558  *                EOF = error
559  */
560 static int core(xprintf_struct *s)
561 {
562   size_t save_len;
563   char *dummy_base;
564 
565   /* basic checks */
566   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
567     return EOF;           /* error for example if value is (int)-10 */
568   s->maxlen--;      /* because initial maxlen counts final 0 */
569   /* note: now 'maxlen' _can_ be zero */
570 
571   if (s->src_string == NULL)
572     s->src_string = "(null)";
573 
574   /* struct init and memory allocation */
575   s->buffer_base = NULL;
576   s->buffer_len = 0;
577   s->real_len = 0;
578   s->pseudo_len = 0;
579   if (realloc_buff(s, (size_t)0) == EOF)
580     return EOF;
581   s->dest_string = s->buffer_base;
582 
583   /* process source string */
584   for (;;) {
585     /* up to end of source string */
586     if (*(s->src_string) == 0) {
587       *(s->dest_string) = '\0';    /* final NUL */
588       break;
589     }
590 
591     if (dispatch(s) == EOF)
592       goto free_EOF;
593 
594     /* up to end of dest string */
595     if (s->real_len >= s->maxlen) {
596       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
597       break;
598     }
599   }
600 
601   /* for (v)asnprintf */
602   dummy_base = s->buffer_base;
603 
604   dummy_base = s->buffer_base + s->real_len;
605   save_len = s->real_len;
606 
607   /* process the remaining of source string to compute 'pseudo_len'. We
608    * overwrite again and again, starting at 'dummy_base' because we don't
609    * need the text, only char count. */
610   while(*(s->src_string) != 0) { /* up to end of source string */
611     s->real_len = 0;
612     s->dest_string = dummy_base;
613     if (dispatch(s) == EOF)
614       goto free_EOF;
615   }
616 
617   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
618   if (s->buffer_base == NULL)
619     return EOF; /* should rarely happen because we shrink the buffer */
620   return s->pseudo_len;
621 
622  free_EOF:
623   free(s->buffer_base);
624   return EOF;
625 }
626 
627 int vasprintf(char **ptr, const char *format_string, va_list vargs)
628 {
629   xprintf_struct s;
630   int retval;
631 
632   s.src_string = format_string;
633 #ifdef va_copy
634   va_copy (s.vargs, vargs);
635 #else
636 # ifdef __va_copy
637   __va_copy (s.vargs, vargs);
638 # else
639 #  ifdef WIN32
640   s.vargs = vargs;
641 #  else
642   memcpy (&s.vargs, &vargs, sizeof (s.va_args));
643 #  endif /* WIN32 */
644 # endif /* __va_copy */
645 #endif /* va_copy */
646   s.maxlen = (size_t)INT_MAX;
647 
648   retval = core(&s);
649   va_end(s.vargs);
650   if (retval == EOF) {
651     *ptr = NULL;
652     return EOF;
653   }
654 
655   *ptr = s.buffer_base;
656   return retval;
657 }
658