1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*###########################################################################
29 # #
30 # vasprintf #
31 # #
32 # Copyright (c) 2002-2005 David TAILLANDIER #
33 # #
34 ###########################################################################*/
35
36 /*
37
38 This software is distributed under the "modified BSD licence".
39
40 This software is also released with GNU license (GPL) in another file (same
41 source-code, only license differ).
42
43
44
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions are met:
47
48 Redistributions of source code must retain the above copyright notice, this
49 list of conditions and the following disclaimer. Redistributions in binary
50 form must reproduce the above copyright notice, this list of conditions and
51 the following disclaimer in the documentation and/or other materials
52 provided with the distribution. The name of the author may not be used to
53 endorse or promote products derived from this software without specific
54 prior written permission.
55
56 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
57 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
58 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
59 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
61 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
62 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
63 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
64 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
65 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67 ====================
68
69 Hacked from xnprintf version of 26th February 2005 to provide only
70 vasprintf by Reuben Thomas <rrt@sc3d.org>.
71
72 ====================
73
74
75 'printf' function family use the following format string:
76
77 %[flag][width][.prec][modifier]type
78
79 %% is the escape sequence to print a '%'
80 % followed by an unknown format will print the characters without
81 trying to do any interpretation
82
83 flag: none + - # (blank)
84 width: n 0n *
85 prec: none .0 .n .*
86 modifier: F N L h l ll z t ('F' and 'N' are ms-dos/16-bit specific)
87 type: d i o u x X f e g E G c s p n
88
89
90 The function needs to allocate memory to store the full text before to
91 actually writing it. i.e if you want to fnprintf() 1000 characters, the
92 functions will allocate 1000 bytes.
93 This behaviour can be modified: you have to customise the code to flush the
94 internal buffer (writing to screen or file) when it reach a given size. Then
95 the buffer can have a shorter length. But what? If you really need to write
96 HUGE string, don't use printf!
97 During the process, some other memory is allocated (1024 bytes minimum)
98 to handle the output of partial sprintf() calls. If you have only 10000 bytes
99 free in memory, you *may* not be able to nprintf() an 8000 bytes-long text.
100
101 note: if a buffer overflow occurs, exit() is called. This situation should
102 never appear ... but if you want to be *really* sure, you have to modify the
103 code to handle those situations (only one place to modify).
104 A buffer overflow can only occur if your sprintf() do strange things or when
105 you use strange formats.
106
107 */
108 #include "file.h"
109
110 #ifndef lint
111 FILE_RCSID("@(#)$File: vasprintf.c,v 1.23 2022/09/24 20:30:13 christos Exp $")
112 #endif /* lint */
113
114 #include <assert.h>
115 #include <string.h>
116 #include <stdlib.h>
117 #include <stdarg.h>
118 #include <ctype.h>
119 #include <limits.h>
120 #include <stddef.h>
121
122 #define ALLOC_CHUNK 2048
123 #define ALLOC_SECURITY_MARGIN 1024 /* big value because some platforms have very big 'G' exponent */
124 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
125 # error !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
126 #endif
127 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
128
129 /*
130 * To save a lot of push/pop, every variable are stored into this
131 * structure, which is passed among nearly every sub-functions.
132 */
133 typedef struct {
134 const char * src_string; /* current position into input string */
135 char * buffer_base; /* output buffer */
136 char * dest_string; /* current position into output string */
137 size_t buffer_len; /* length of output buffer */
138 size_t real_len; /* real current length of output text */
139 size_t pseudo_len; /* total length of output text if it were not limited in size */
140 size_t maxlen;
141 va_list vargs; /* pointer to current position into vargs */
142 } xprintf_struct;
143
144 /*
145 * Realloc buffer if needed
146 * Return value: 0 = ok
147 * EOF = not enough memory
148 */
realloc_buff(xprintf_struct * s,size_t len)149 static int realloc_buff(xprintf_struct *s, size_t len)
150 {
151 char * ptr;
152
153 if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
154 len += s->real_len + ALLOC_CHUNK;
155 ptr = (char *)realloc((void *)(s->buffer_base), len);
156 if (ptr == NULL) {
157 s->buffer_base = NULL;
158 return EOF;
159 }
160
161 s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
162 s->buffer_base = ptr;
163 s->buffer_len = len;
164
165 (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
166 }
167
168 return 0;
169 }
170
171 /*
172 * Prints 'usual' characters up to next '%'
173 * or up to end of text
174 */
usual_char(xprintf_struct * s)175 static int usual_char(xprintf_struct * s)
176 {
177 size_t len;
178
179 len = strcspn(s->src_string, "%"); /* reaches the next '%' or end of input string */
180 /* note: 'len' is never 0 because the presence of '%' */
181 /* or end-of-line is checked in the calling function */
182
183 if (realloc_buff(s,len) == EOF)
184 return EOF;
185
186 memcpy(s->dest_string, s->src_string, len);
187 s->src_string += len;
188 s->dest_string += len;
189 s->real_len += len;
190 s->pseudo_len += len;
191
192 return 0;
193 }
194
195 /*
196 * Return value: 0 = ok
197 * EOF = error
198 */
print_it(xprintf_struct * s,size_t approx_len,const char * format_string,...)199 static int print_it(xprintf_struct *s, size_t approx_len,
200 const char *format_string, ...)
201 {
202 va_list varg;
203 int vsprintf_len;
204 size_t len;
205
206 if (realloc_buff(s,approx_len) == EOF)
207 return EOF;
208
209 va_start(varg, format_string);
210 vsprintf_len = vsprintf(s->dest_string, format_string, varg);
211 va_end(varg);
212
213 /* Check for overflow */
214 assert((s->buffer_base)[s->buffer_len - 1] == 1);
215
216 if (vsprintf_len == EOF) /* must be done *after* overflow-check */
217 return EOF;
218
219 s->pseudo_len += vsprintf_len;
220 len = strlen(s->dest_string);
221 s->real_len += len;
222 s->dest_string += len;
223
224 return 0;
225 }
226
227 /*
228 * Prints a string (%s)
229 * We need special handling because:
230 * a: the length of the string is unknown
231 * b: when .prec is used, we must not access any extra byte of the
232 * string (of course, if the original sprintf() does... what the
233 * hell, not my problem)
234 *
235 * Return value: 0 = ok
236 * EOF = error
237 */
type_s(xprintf_struct * s,int width,int prec,const char * format_string,const char * arg_string)238 static int type_s(xprintf_struct *s, int width, int prec,
239 const char *format_string, const char *arg_string)
240 {
241 size_t string_len;
242
243 if (arg_string == NULL)
244 return print_it(s, (size_t)6, "(null)", 0);
245
246 /* hand-made strlen() which stops when 'prec' is reached. */
247 /* if 'prec' is -1 then it is never reached. */
248 string_len = 0;
249 while (arg_string[string_len] != 0 && (size_t)prec != string_len)
250 string_len++;
251
252 if (width != -1 && string_len < (size_t)width)
253 string_len = (size_t)width;
254
255 return print_it(s, string_len, format_string, arg_string);
256 }
257
258 /*
259 * Read a series of digits. Stop when non-digit is found.
260 * Return value: the value read (between 0 and 32767).
261 * Note: no checks are made against overflow. If the string contain a big
262 * number, then the return value won't be what we want (but, in this case,
263 * the programmer don't know whatr he wants, then no problem).
264 */
getint(const char ** string)265 static int getint(const char **string)
266 {
267 int i = 0;
268
269 while (isdigit((unsigned char)**string) != 0) {
270 i = i * 10 + (**string - '0');
271 (*string)++;
272 }
273
274 if (i < 0 || i > 32767)
275 i = 32767; /* if we have i==-10 this is not because the number is */
276 /* negative; this is because the number is big */
277 return i;
278 }
279
280 /*
281 * Read a part of the format string. A part is 'usual characters' (ie "blabla")
282 * or '%%' escape sequence (to print a single '%') or any combination of
283 * format specifier (ie "%i" or "%10.2d").
284 * After the current part is managed, the function returns to caller with
285 * everything ready to manage the following part.
286 * The caller must ensure than the string is not empty, i.e. the first byte
287 * is not zero.
288 *
289 * Return value: 0 = ok
290 * EOF = error
291 */
dispatch(xprintf_struct * s)292 static int dispatch(xprintf_struct *s)
293 {
294 const char *initial_ptr;
295 char format_string[24]; /* max length may be something like "% +-#032768.32768Ld" */
296 char *format_ptr;
297 int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
298 int width, prec, modifier, approx_width;
299 char type;
300 /* most of those variables are here to rewrite the format string */
301
302 #define SRCTXT (s->src_string)
303 #define DESTTXT (s->dest_string)
304
305 /* incoherent format string. Characters after the '%' will be printed with the next call */
306 #define INCOHERENT() do {SRCTXT=initial_ptr; return 0;} while (0) /* do/while to avoid */
307 #define INCOHERENT_TEST() do {if(*SRCTXT==0) INCOHERENT();} while (0) /* a null statement */
308
309 /* 'normal' text */
310 if (*SRCTXT != '%')
311 return usual_char(s);
312
313 /* we then have a '%' */
314 SRCTXT++;
315 /* don't check for end-of-string ; this is done later */
316
317 /* '%%' escape sequence */
318 if (*SRCTXT == '%') {
319 if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
320 return EOF;
321 *DESTTXT = '%';
322 DESTTXT++;
323 SRCTXT++;
324 (s->real_len)++;
325 (s->pseudo_len)++;
326 return 0;
327 }
328
329 /* '%' managing */
330 initial_ptr = SRCTXT; /* save current pointer in case of incorrect */
331 /* 'decoding'. Points just after the '%' so the '%' */
332 /* won't be printed in any case, as required. */
333
334 /* flag */
335 flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
336
337 for (;; SRCTXT++) {
338 if (*SRCTXT == ' ')
339 flag_space = 1;
340 else if (*SRCTXT == '+')
341 flag_plus = 1;
342 else if (*SRCTXT == '-')
343 flag_minus = 1;
344 else if (*SRCTXT == '#')
345 flag_sharp = 1;
346 else if (*SRCTXT == '0')
347 flag_zero = 1;
348 else
349 break;
350 }
351
352 INCOHERENT_TEST(); /* here is the first test for end of string */
353
354 /* width */
355 if (*SRCTXT == '*') { /* width given by next argument */
356 SRCTXT++;
357 width = va_arg(s->vargs, int);
358 if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
359 width = 0x3fff;
360 } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
361 width = getint(&SRCTXT);
362 else
363 width = -1; /* no width specified */
364
365 INCOHERENT_TEST();
366
367 /* .prec */
368 if (*SRCTXT == '.') {
369 SRCTXT++;
370 if (*SRCTXT == '*') { /* .prec given by next argument */
371 SRCTXT++;
372 prec = va_arg(s->vargs, int);
373 if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
374 prec = 0x3fff;
375 } else { /* .prec given as ASCII number */
376 if (isdigit((unsigned char)*SRCTXT) == 0)
377 INCOHERENT();
378 prec = getint(&SRCTXT);
379 }
380 INCOHERENT_TEST();
381 } else
382 prec = -1; /* no .prec specified */
383
384 /* modifier */
385 switch (*SRCTXT) {
386 case 'L':
387 case 'h':
388 case 'l':
389 case 'z':
390 case 't':
391 modifier = *SRCTXT;
392 SRCTXT++;
393 if (modifier=='l' && *SRCTXT=='l') {
394 SRCTXT++;
395 modifier = 'L'; /* 'll' == 'L' long long == long double */
396 } /* only for compatibility ; not portable */
397 INCOHERENT_TEST();
398 break;
399 default:
400 modifier = -1; /* no modifier specified */
401 break;
402 }
403
404 /* type */
405 type = *SRCTXT;
406 if (strchr("diouxXfegEGcspn",type) == NULL)
407 INCOHERENT(); /* unknown type */
408 SRCTXT++;
409
410 /* rewrite format-string */
411 format_string[0] = '%';
412 format_ptr = &(format_string[1]);
413
414 if (flag_plus) {
415 *format_ptr = '+';
416 format_ptr++;
417 }
418 if (flag_minus) {
419 *format_ptr = '-';
420 format_ptr++;
421 }
422 if (flag_space) {
423 *format_ptr = ' ';
424 format_ptr++;
425 }
426 if (flag_sharp) {
427 *format_ptr = '#';
428 format_ptr++;
429 }
430 if (flag_zero) {
431 *format_ptr = '0';
432 format_ptr++;
433 } /* '0' *must* be the last one */
434
435 if (width != -1) {
436 sprintf(format_ptr, "%i", width);
437 format_ptr += strlen(format_ptr);
438 }
439
440 if (prec != -1) {
441 *format_ptr = '.';
442 format_ptr++;
443 sprintf(format_ptr, "%i", prec);
444 format_ptr += strlen(format_ptr);
445 }
446
447 if (modifier != -1) {
448 if (modifier == 'L' && strchr("diouxX",type) != NULL) {
449 *format_ptr = 'l';
450 format_ptr++;
451 *format_ptr = 'l';
452 format_ptr++;
453 } else {
454 *format_ptr = modifier;
455 format_ptr++;
456 }
457 }
458
459 *format_ptr = type;
460 format_ptr++;
461 *format_ptr = 0;
462
463 /* vague approximation of minimal length if width or prec are specified */
464 approx_width = width + prec;
465 if (approx_width < 0) /* because width == -1 and/or prec == -1 */
466 approx_width = 0;
467
468 switch (type) {
469 /* int */
470 case 'd':
471 case 'i':
472 case 'o':
473 case 'u':
474 case 'x':
475 case 'X':
476 switch (modifier) {
477 case -1 :
478 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
479 case 'L':
480 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
481 case 'l':
482 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
483 case 'h':
484 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
485 case 'z':
486 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
487 case 't':
488 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
489 /* 'int' instead of 'short int' because default promotion is 'int' */
490 default:
491 INCOHERENT();
492 }
493
494 /* char */
495 case 'c':
496 if (modifier != -1)
497 INCOHERENT();
498 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
499 /* 'int' instead of 'char' because default promotion is 'int' */
500
501 /* math */
502 case 'e':
503 case 'f':
504 case 'g':
505 case 'E':
506 case 'G':
507 switch (modifier) {
508 case -1 : /* because of default promotion, no modifier means 'l' */
509 case 'l':
510 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
511 case 'L':
512 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
513 default:
514 INCOHERENT();
515 }
516
517 /* string */
518 case 's':
519 return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
520
521 /* pointer */
522 case 'p':
523 if (modifier == -1)
524 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
525 INCOHERENT();
526
527 /* store */
528 case 'n':
529 if (modifier == -1) {
530 int * p;
531 p = va_arg(s->vargs, int *);
532 if (p != NULL) {
533 *p = s->pseudo_len;
534 return 0;
535 }
536 return EOF;
537 }
538 INCOHERENT();
539
540 } /* switch */
541
542 INCOHERENT(); /* unknown type */
543
544 #undef INCOHERENT
545 #undef INCOHERENT_TEST
546 #undef SRCTXT
547 #undef DESTTXT
548 }
549
550 /*
551 * Return value: number of *virtually* written characters
552 * EOF = error
553 */
core(xprintf_struct * s)554 static int core(xprintf_struct *s)
555 {
556 size_t save_len;
557 char *dummy_base;
558
559 /* basic checks */
560 if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
561 return EOF; /* error for example if value is (int)-10 */
562 s->maxlen--; /* because initial maxlen counts final 0 */
563 /* note: now 'maxlen' _can_ be zero */
564
565 if (s->src_string == NULL)
566 s->src_string = "(null)";
567
568 /* struct init and memory allocation */
569 s->buffer_base = NULL;
570 s->buffer_len = 0;
571 s->real_len = 0;
572 s->pseudo_len = 0;
573 if (realloc_buff(s, (size_t)0) == EOF)
574 return EOF;
575 s->dest_string = s->buffer_base;
576
577 /* process source string */
578 for (;;) {
579 /* up to end of source string */
580 if (*(s->src_string) == 0) {
581 *(s->dest_string) = '\0'; /* final NUL */
582 break;
583 }
584
585 if (dispatch(s) == EOF)
586 goto free_EOF;
587
588 /* up to end of dest string */
589 if (s->real_len >= s->maxlen) {
590 (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
591 break;
592 }
593 }
594
595 /* for (v)asnprintf */
596 dummy_base = s->buffer_base + s->real_len;
597 save_len = s->real_len;
598
599 /* process the remaining of source string to compute 'pseudo_len'. We
600 * overwrite again and again, starting at 'dummy_base' because we don't
601 * need the text, only char count. */
602 while(*(s->src_string) != 0) { /* up to end of source string */
603 s->real_len = 0;
604 s->dest_string = dummy_base;
605 if (dispatch(s) == EOF)
606 goto free_EOF;
607 }
608
609 s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
610 if (s->buffer_base == NULL)
611 return EOF; /* should rarely happen because we shrink the buffer */
612 return s->pseudo_len;
613
614 free_EOF:
615 free(s->buffer_base);
616 return EOF;
617 }
618
vasprintf(char ** ptr,const char * format_string,va_list vargs)619 int vasprintf(char **ptr, const char *format_string, va_list vargs)
620 {
621 xprintf_struct s;
622 int retval;
623
624 memset(&s, 0, sizeof(s));
625 s.src_string = format_string;
626 #ifdef va_copy
627 va_copy (s.vargs, vargs);
628 #else
629 # ifdef __va_copy
630 __va_copy (s.vargs, vargs);
631 # else
632 # ifdef WIN32
633 s.vargs = vargs;
634 # else
635 memcpy (&s.vargs, &vargs, sizeof (s.vargs));
636 # endif /* WIN32 */
637 # endif /* __va_copy */
638 #endif /* va_copy */
639 s.maxlen = (size_t)INT_MAX;
640
641 retval = core(&s);
642 va_end(s.vargs);
643 if (retval == EOF) {
644 *ptr = NULL;
645 return EOF;
646 }
647
648 *ptr = s.buffer_base;
649 return retval;
650 }
651