xref: /freebsd/contrib/file/src/vasprintf.c (revision d4eeb02986980bf33dd56c41ceb9fc5f180c0d47)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*###########################################################################
29   #                                                                           #
30   #                                vasprintf                                  #
31   #                                                                           #
32   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
33   #                                                                           #
34   ###########################################################################*/
35 
36 /*
37 
38 This software is distributed under the "modified BSD licence".
39 
40 This software is also released with GNU license (GPL) in another file (same
41 source-code, only license differ).
42 
43 
44 
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions are met:
47 
48 Redistributions of source code must retain the above copyright notice, this
49 list of conditions and the following disclaimer. Redistributions in binary
50 form must reproduce the above copyright notice, this list of conditions and
51 the following disclaimer in the documentation and/or other materials
52 provided with the distribution. The name of the author may not be used to
53 endorse or promote products derived from this software without specific
54 prior written permission.
55 
56 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
57 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
58 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
59 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
61 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
62 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
63 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
64 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
65 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66 
67 ====================
68 
69 Hacked from xnprintf version of 26th February 2005 to provide only
70 vasprintf by Reuben Thomas <rrt@sc3d.org>.
71 
72 ====================
73 
74 
75 'printf' function family use the following format string:
76 
77 %[flag][width][.prec][modifier]type
78 
79 %% is the escape sequence to print a '%'
80 %  followed by an unknown format will print the characters without
81 trying to do any interpretation
82 
83 flag:   none   +     -     #     (blank)
84 width:  n    0n    *
85 prec:   none   .0    .n     .*
86 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
87 type:  d i o u x X f e g E G c s p n
88 
89 
90 The function needs to allocate memory to store the full text before to
91 actually writing it.  i.e if you want to fnprintf() 1000 characters, the
92 functions will allocate 1000 bytes.
93 This behaviour can be modified: you have to customise the code to flush the
94 internal buffer (writing to screen or file) when it reach a given size. Then
95 the buffer can have a shorter length. But what? If you really need to write
96 HUGE string, don't use printf!
97 During the process, some other memory is allocated (1024 bytes minimum)
98 to handle the output of partial sprintf() calls. If you have only 10000 bytes
99 free in memory, you *may* not be able to nprintf() an 8000 bytes-long text.
100 
101 note: if a buffer overflow occurs, exit() is called. This situation should
102 never appear ... but if you want to be *really* sure, you have to modify the
103 code to handle those situations (only one place to modify).
104 A buffer overflow can only occur if your sprintf() do strange things or when
105 you use strange formats.
106 
107 */
108 #include "file.h"
109 
110 #ifndef	lint
111 FILE_RCSID("@(#)$File: vasprintf.c,v 1.19 2021/02/23 00:51:11 christos Exp $")
112 #endif	/* lint */
113 
114 #include <assert.h>
115 #include <string.h>
116 #include <stdlib.h>
117 #include <stdarg.h>
118 #include <ctype.h>
119 #include <limits.h>
120 #include <stddef.h>
121 
122 #define ALLOC_CHUNK 2048
123 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
124 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
125 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
126 #endif
127 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
128 
129 /*
130  *  To save a lot of push/pop, every variable are stored into this
131  *  structure, which is passed among nearly every sub-functions.
132  */
133 typedef struct {
134   const char * src_string;        /* current position into input string */
135   char *       buffer_base;       /* output buffer */
136   char *       dest_string;       /* current position into output string */
137   size_t       buffer_len;        /* length of output buffer */
138   size_t       real_len;          /* real current length of output text */
139   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
140   size_t       maxlen;
141   va_list      vargs;             /* pointer to current position into vargs */
142   char *       sprintf_string;
143   FILE *       fprintf_file;
144 } xprintf_struct;
145 
146 /*
147  *  Realloc buffer if needed
148  *  Return value:  0 = ok
149  *               EOF = not enough memory
150  */
151 static int realloc_buff(xprintf_struct *s, size_t len)
152 {
153   char * ptr;
154 
155   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
156     len += s->real_len + ALLOC_CHUNK;
157     ptr = (char *)realloc((void *)(s->buffer_base), len);
158     if (ptr == NULL) {
159       s->buffer_base = NULL;
160       return EOF;
161     }
162 
163     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
164     s->buffer_base = ptr;
165     s->buffer_len = len;
166 
167     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
168   }
169 
170   return 0;
171 }
172 
173 /*
174  *  Prints 'usual' characters    up to next '%'
175  *                            or up to end of text
176  */
177 static int usual_char(xprintf_struct * s)
178 {
179   size_t len;
180 
181   len = strcspn(s->src_string, "%");     /* reaches the next '%' or end of input string */
182   /* note: 'len' is never 0 because the presence of '%' */
183   /* or end-of-line is checked in the calling function  */
184 
185   if (realloc_buff(s,len) == EOF)
186     return EOF;
187 
188   memcpy(s->dest_string, s->src_string, len);
189   s->src_string += len;
190   s->dest_string += len;
191   s->real_len += len;
192   s->pseudo_len += len;
193 
194   return 0;
195 }
196 
197 /*
198  *  Return value: 0 = ok
199  *                EOF = error
200  */
201 static int print_it(xprintf_struct *s, size_t approx_len,
202                     const char *format_string, ...)
203 {
204   va_list varg;
205   int vsprintf_len;
206   size_t len;
207 
208   if (realloc_buff(s,approx_len) == EOF)
209     return EOF;
210 
211   va_start(varg, format_string);
212   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
213   va_end(varg);
214 
215   /* Check for overflow */
216   assert((s->buffer_base)[s->buffer_len - 1] == 1);
217 
218   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
219     return EOF;
220 
221   s->pseudo_len += vsprintf_len;
222   len = strlen(s->dest_string);
223   s->real_len += len;
224   s->dest_string += len;
225 
226   return 0;
227 }
228 
229 /*
230  *  Prints a string (%s)
231  *  We need special handling because:
232  *     a: the length of the string is unknown
233  *     b: when .prec is used, we must not access any extra byte of the
234  *        string (of course, if the original sprintf() does... what the
235  *        hell, not my problem)
236  *
237  *  Return value: 0 = ok
238  *                EOF = error
239  */
240 static int type_s(xprintf_struct *s, int width, int prec,
241                   const char *format_string, const char *arg_string)
242 {
243   size_t string_len;
244 
245   if (arg_string == NULL)
246     return print_it(s, (size_t)6, "(null)", 0);
247 
248   /* hand-made strlen() which stops when 'prec' is reached. */
249   /* if 'prec' is -1 then it is never reached. */
250   string_len = 0;
251   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
252     string_len++;
253 
254   if (width != -1 && string_len < (size_t)width)
255     string_len = (size_t)width;
256 
257   return print_it(s, string_len, format_string, arg_string);
258 }
259 
260 /*
261  *  Read a series of digits. Stop when non-digit is found.
262  *  Return value: the value read (between 0 and 32767).
263  *  Note: no checks are made against overflow. If the string contain a big
264  *  number, then the return value won't be what we want (but, in this case,
265  *  the programmer don't know whatr he wants, then no problem).
266  */
267 static int getint(const char **string)
268 {
269   int i = 0;
270 
271   while (isdigit((unsigned char)**string) != 0) {
272     i = i * 10 + (**string - '0');
273     (*string)++;
274   }
275 
276   if (i < 0 || i > 32767)
277     i = 32767; /* if we have i==-10 this is not because the number is */
278   /* negative; this is because the number is big */
279   return i;
280 }
281 
282 /*
283  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
284  *  or '%%' escape sequence (to print a single '%') or any combination of
285  *  format specifier (ie "%i" or "%10.2d").
286  *  After the current part is managed, the function returns to caller with
287  *  everything ready to manage the following part.
288  *  The caller must ensure than the string is not empty, i.e. the first byte
289  *  is not zero.
290  *
291  *  Return value:  0 = ok
292  *                 EOF = error
293  */
294 static int dispatch(xprintf_struct *s)
295 {
296   const char *initial_ptr;
297   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
298   char *format_ptr;
299   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
300   int width, prec, modifier, approx_width;
301   char type;
302   /* most of those variables are here to rewrite the format string */
303 
304 #define SRCTXT  (s->src_string)
305 #define DESTTXT (s->dest_string)
306 
307   /* incoherent format string. Characters after the '%' will be printed with the next call */
308 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
309 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
310 
311   /* 'normal' text */
312   if (*SRCTXT != '%')
313     return usual_char(s);
314 
315   /* we then have a '%' */
316   SRCTXT++;
317   /* don't check for end-of-string ; this is done later */
318 
319   /* '%%' escape sequence */
320   if (*SRCTXT == '%') {
321     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
322       return EOF;
323     *DESTTXT = '%';
324     DESTTXT++;
325     SRCTXT++;
326     (s->real_len)++;
327     (s->pseudo_len)++;
328     return 0;
329   }
330 
331   /* '%' managing */
332   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
333   /* 'decoding'. Points just after the '%' so the '%' */
334   /* won't be printed in any case, as required. */
335 
336   /* flag */
337   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
338 
339   for (;; SRCTXT++) {
340     if (*SRCTXT == ' ')
341       flag_space = 1;
342     else if (*SRCTXT == '+')
343       flag_plus = 1;
344     else if (*SRCTXT == '-')
345       flag_minus = 1;
346     else if (*SRCTXT == '#')
347       flag_sharp = 1;
348     else if (*SRCTXT == '0')
349       flag_zero = 1;
350     else
351       break;
352   }
353 
354   INCOHERENT_TEST();    /* here is the first test for end of string */
355 
356   /* width */
357   if (*SRCTXT == '*') {         /* width given by next argument */
358     SRCTXT++;
359     width = va_arg(s->vargs, int);
360     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
361       width = 0x3fff;
362   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
363     width = getint(&SRCTXT);
364   else
365     width = -1;                 /* no width specified */
366 
367   INCOHERENT_TEST();
368 
369   /* .prec */
370   if (*SRCTXT == '.') {
371     SRCTXT++;
372     if (*SRCTXT == '*') {       /* .prec given by next argument */
373       SRCTXT++;
374       prec = va_arg(s->vargs, int);
375       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
376         prec = 0x3fff;
377     } else {                    /* .prec given as ASCII number */
378       if (isdigit((unsigned char)*SRCTXT) == 0)
379         INCOHERENT();
380       prec = getint(&SRCTXT);
381     }
382     INCOHERENT_TEST();
383   } else
384     prec = -1;                  /* no .prec specified */
385 
386   /* modifier */
387   switch (*SRCTXT) {
388   case 'L':
389   case 'h':
390   case 'l':
391   case 'z':
392   case 't':
393     modifier = *SRCTXT;
394     SRCTXT++;
395     if (modifier=='l' && *SRCTXT=='l') {
396       SRCTXT++;
397       modifier = 'L';  /* 'll' == 'L'      long long == long double */
398     } /* only for compatibility ; not portable */
399     INCOHERENT_TEST();
400     break;
401   default:
402     modifier = -1;              /* no modifier specified */
403     break;
404   }
405 
406   /* type */
407   type = *SRCTXT;
408   if (strchr("diouxXfegEGcspn",type) == NULL)
409     INCOHERENT();               /* unknown type */
410   SRCTXT++;
411 
412   /* rewrite format-string */
413   format_string[0] = '%';
414   format_ptr = &(format_string[1]);
415 
416   if (flag_plus) {
417     *format_ptr = '+';
418     format_ptr++;
419   }
420   if (flag_minus) {
421     *format_ptr = '-';
422     format_ptr++;
423   }
424   if (flag_space) {
425     *format_ptr = ' ';
426     format_ptr++;
427   }
428   if (flag_sharp) {
429     *format_ptr = '#';
430     format_ptr++;
431   }
432   if (flag_zero) {
433     *format_ptr = '0';
434     format_ptr++;
435   } /* '0' *must* be the last one */
436 
437   if (width != -1) {
438     sprintf(format_ptr, "%i", width);
439     format_ptr += strlen(format_ptr);
440   }
441 
442   if (prec != -1) {
443     *format_ptr = '.';
444     format_ptr++;
445     sprintf(format_ptr, "%i", prec);
446     format_ptr += strlen(format_ptr);
447   }
448 
449   if (modifier != -1) {
450     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
451       *format_ptr = 'l';
452       format_ptr++;
453       *format_ptr = 'l';
454       format_ptr++;
455     } else {
456       *format_ptr = modifier;
457       format_ptr++;
458     }
459   }
460 
461   *format_ptr = type;
462   format_ptr++;
463   *format_ptr = 0;
464 
465   /* vague approximation of minimal length if width or prec are specified */
466   approx_width = width + prec;
467   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
468     approx_width = 0;
469 
470   switch (type) {
471     /* int */
472   case 'd':
473   case 'i':
474   case 'o':
475   case 'u':
476   case 'x':
477   case 'X':
478     switch (modifier) {
479     case -1 :
480       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
481     case 'L':
482       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
483     case 'l':
484       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
485     case 'h':
486       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
487     case 'z':
488       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
489     case 't':
490       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
491       /* 'int' instead of 'short int' because default promotion is 'int' */
492     default:
493       INCOHERENT();
494     }
495 
496     /* char */
497   case 'c':
498     if (modifier != -1)
499       INCOHERENT();
500     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
501     /* 'int' instead of 'char' because default promotion is 'int' */
502 
503     /* math */
504   case 'e':
505   case 'f':
506   case 'g':
507   case 'E':
508   case 'G':
509     switch (modifier) {
510     case -1 : /* because of default promotion, no modifier means 'l' */
511     case 'l':
512       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
513     case 'L':
514       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
515     default:
516       INCOHERENT();
517     }
518 
519     /* string */
520   case 's':
521     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
522 
523     /* pointer */
524   case 'p':
525     if (modifier == -1)
526       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
527     INCOHERENT();
528 
529     /* store */
530   case 'n':
531     if (modifier == -1) {
532       int * p;
533       p = va_arg(s->vargs, int *);
534       if (p != NULL) {
535         *p = s->pseudo_len;
536         return 0;
537       }
538       return EOF;
539     }
540     INCOHERENT();
541 
542   } /* switch */
543 
544   INCOHERENT();                 /* unknown type */
545 
546 #undef INCOHERENT
547 #undef INCOHERENT_TEST
548 #undef SRCTXT
549 #undef DESTTXT
550 }
551 
552 /*
553  *  Return value: number of *virtually* written characters
554  *                EOF = error
555  */
556 static int core(xprintf_struct *s)
557 {
558   size_t save_len;
559   char *dummy_base;
560 
561   /* basic checks */
562   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
563     return EOF;           /* error for example if value is (int)-10 */
564   s->maxlen--;      /* because initial maxlen counts final 0 */
565   /* note: now 'maxlen' _can_ be zero */
566 
567   if (s->src_string == NULL)
568     s->src_string = "(null)";
569 
570   /* struct init and memory allocation */
571   s->buffer_base = NULL;
572   s->buffer_len = 0;
573   s->real_len = 0;
574   s->pseudo_len = 0;
575   if (realloc_buff(s, (size_t)0) == EOF)
576     return EOF;
577   s->dest_string = s->buffer_base;
578 
579   /* process source string */
580   for (;;) {
581     /* up to end of source string */
582     if (*(s->src_string) == 0) {
583       *(s->dest_string) = '\0';    /* final NUL */
584       break;
585     }
586 
587     if (dispatch(s) == EOF)
588       goto free_EOF;
589 
590     /* up to end of dest string */
591     if (s->real_len >= s->maxlen) {
592       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
593       break;
594     }
595   }
596 
597   /* for (v)asnprintf */
598   dummy_base = s->buffer_base;
599 
600   dummy_base = s->buffer_base + s->real_len;
601   save_len = s->real_len;
602 
603   /* process the remaining of source string to compute 'pseudo_len'. We
604    * overwrite again and again, starting at 'dummy_base' because we don't
605    * need the text, only char count. */
606   while(*(s->src_string) != 0) { /* up to end of source string */
607     s->real_len = 0;
608     s->dest_string = dummy_base;
609     if (dispatch(s) == EOF)
610       goto free_EOF;
611   }
612 
613   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
614   if (s->buffer_base == NULL)
615     return EOF; /* should rarely happen because we shrink the buffer */
616   return s->pseudo_len;
617 
618  free_EOF:
619   free(s->buffer_base);
620   return EOF;
621 }
622 
623 int vasprintf(char **ptr, const char *format_string, va_list vargs)
624 {
625   xprintf_struct s;
626   int retval;
627 
628   s.src_string = format_string;
629 #ifdef va_copy
630   va_copy (s.vargs, vargs);
631 #else
632 # ifdef __va_copy
633   __va_copy (s.vargs, vargs);
634 # else
635 #  ifdef WIN32
636   s.vargs = vargs;
637 #  else
638   memcpy (&s.vargs, &vargs, sizeof (s.va_args));
639 #  endif /* WIN32 */
640 # endif /* __va_copy */
641 #endif /* va_copy */
642   s.maxlen = (size_t)INT_MAX;
643 
644   retval = core(&s);
645   va_end(s.vargs);
646   if (retval == EOF) {
647     *ptr = NULL;
648     return EOF;
649   }
650 
651   *ptr = s.buffer_base;
652   return retval;
653 }
654