1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Scanf/printf implementation for use in *Sanitizer interceptors.
10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19  *out = internal_atoll(p);
20  while (*p >= '0' && *p <= '9')
21    ++p;
22  return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26  // n$
27  if (*p >= '0' && *p <= '9') {
28    int number;
29    const char *q = parse_number(p, &number);
30    CHECK(q);
31    if (*q == '$') {
32      *out = number;
33      p = q + 1;
34    }
35  }
36
37  // Otherwise, do not change p. This will be re-parsed later as the field
38  // width.
39  return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43  return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47  if (char_is_one_of(*p, "jztLq")) {
48    ll[0] = *p;
49    ++p;
50  } else if (*p == 'h') {
51    ll[0] = 'h';
52    ++p;
53    if (*p == 'h') {
54      ll[1] = 'h';
55      ++p;
56    }
57  } else if (*p == 'l') {
58    ll[0] = 'l';
59    ++p;
60    if (*p == 'l') {
61      ll[1] = 'l';
62      ++p;
63    }
64  }
65  return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70  return char_is_one_of(c, "diouxXn");
71}
72
73// Returns true if the character is an floating point conversion specifier.
74static bool format_is_float_conv(char c) {
75  return char_is_one_of(c, "aAeEfFgG");
76}
77
78// Returns string output character size for string-like conversions,
79// or 0 if the conversion is invalid.
80static int format_get_char_size(char convSpecifier,
81                                const char lengthModifier[2]) {
82  if (char_is_one_of(convSpecifier, "CS")) {
83    return sizeof(wchar_t);
84  }
85
86  if (char_is_one_of(convSpecifier, "cs[")) {
87    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88      return sizeof(wchar_t);
89    else if (lengthModifier[0] == '\0')
90      return sizeof(char);
91  }
92
93  return 0;
94}
95
96enum FormatStoreSize {
97  // Store size not known in advance; can be calculated as wcslen() of the
98  // destination buffer.
99  FSS_WCSLEN = -2,
100  // Store size not known in advance; can be calculated as strlen() of the
101  // destination buffer.
102  FSS_STRLEN = -1,
103  // Invalid conversion specifier.
104  FSS_INVALID = 0
105};
106
107// Returns the memory size of a format directive (if >0), or a value of
108// FormatStoreSize.
109static int format_get_value_size(char convSpecifier,
110                                 const char lengthModifier[2],
111                                 bool promote_float) {
112  if (format_is_integer_conv(convSpecifier)) {
113    switch (lengthModifier[0]) {
114    case 'h':
115      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
116    case 'l':
117      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
118    case 'q':
119      return sizeof(long long);
120    case 'L':
121      return sizeof(long long);
122    case 'j':
123      return sizeof(INTMAX_T);
124    case 'z':
125      return sizeof(SIZE_T);
126    case 't':
127      return sizeof(PTRDIFF_T);
128    case 0:
129      return sizeof(int);
130    default:
131      return FSS_INVALID;
132    }
133  }
134
135  if (format_is_float_conv(convSpecifier)) {
136    switch (lengthModifier[0]) {
137    case 'L':
138    case 'q':
139      return sizeof(long double);
140    case 'l':
141      return lengthModifier[1] == 'l' ? sizeof(long double)
142                                           : sizeof(double);
143    case 0:
144      // Printf promotes floats to doubles but scanf does not
145      return promote_float ? sizeof(double) : sizeof(float);
146    default:
147      return FSS_INVALID;
148    }
149  }
150
151  if (convSpecifier == 'p') {
152    if (lengthModifier[0] != 0)
153      return FSS_INVALID;
154    return sizeof(void *);
155  }
156
157  return FSS_INVALID;
158}
159
160struct ScanfDirective {
161  int argIdx; // argument index, or -1 if not specified ("%n$")
162  int fieldWidth;
163  const char *begin;
164  const char *end;
165  bool suppressed; // suppress assignment ("*")
166  bool allocate;   // allocate space ("m")
167  char lengthModifier[2];
168  char convSpecifier;
169  bool maybeGnuMalloc;
170};
171
172// Parse scanf format string. If a valid directive in encountered, it is
173// returned in dir. This function returns the pointer to the first
174// unprocessed character, or 0 in case of error.
175// In case of the end-of-string, a pointer to the closing \0 is returned.
176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177                                    ScanfDirective *dir) {
178  internal_memset(dir, 0, sizeof(*dir));
179  dir->argIdx = -1;
180
181  while (*p) {
182    if (*p != '%') {
183      ++p;
184      continue;
185    }
186    dir->begin = p;
187    ++p;
188    // %%
189    if (*p == '%') {
190      ++p;
191      continue;
192    }
193    if (*p == '\0') {
194      return nullptr;
195    }
196    // %n$
197    p = maybe_parse_param_index(p, &dir->argIdx);
198    CHECK(p);
199    // *
200    if (*p == '*') {
201      dir->suppressed = true;
202      ++p;
203    }
204    // Field width
205    if (*p >= '0' && *p <= '9') {
206      p = parse_number(p, &dir->fieldWidth);
207      CHECK(p);
208      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
209        return nullptr;
210    }
211    // m
212    if (*p == 'm') {
213      dir->allocate = true;
214      ++p;
215    }
216    // Length modifier.
217    p = maybe_parse_length_modifier(p, dir->lengthModifier);
218    // Conversion specifier.
219    dir->convSpecifier = *p++;
220    // Consume %[...] expression.
221    if (dir->convSpecifier == '[') {
222      if (*p == '^')
223        ++p;
224      if (*p == ']')
225        ++p;
226      while (*p && *p != ']')
227        ++p;
228      if (*p == 0)
229        return nullptr; // unexpected end of string
230                        // Consume the closing ']'.
231      ++p;
232    }
233    // This is unfortunately ambiguous between old GNU extension
234    // of %as, %aS and %a[...] and newer POSIX %a followed by
235    // letters s, S or [.
236    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237        !dir->lengthModifier[0]) {
238      if (*p == 's' || *p == 'S') {
239        dir->maybeGnuMalloc = true;
240        ++p;
241      } else if (*p == '[') {
242        // Watch for %a[h-j%d], if % appears in the
243        // [...] range, then we need to give up, we don't know
244        // if scanf will parse it as POSIX %a [h-j %d ] or
245        // GNU allocation of string with range dh-j plus %.
246        const char *q = p + 1;
247        if (*q == '^')
248          ++q;
249        if (*q == ']')
250          ++q;
251        while (*q && *q != ']' && *q != '%')
252          ++q;
253        if (*q == 0 || *q == '%')
254          return nullptr;
255        p = q + 1; // Consume the closing ']'.
256        dir->maybeGnuMalloc = true;
257      }
258    }
259    dir->end = p;
260    break;
261  }
262  return p;
263}
264
265static int scanf_get_value_size(ScanfDirective *dir) {
266  if (dir->allocate) {
267    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
268      return FSS_INVALID;
269    return sizeof(char *);
270  }
271
272  if (dir->maybeGnuMalloc) {
273    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
274      return FSS_INVALID;
275    // This is ambiguous, so check the smaller size of char * (if it is
276    // a GNU extension of %as, %aS or %a[...]) and float (if it is
277    // POSIX %a followed by s, S or [ letters).
278    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279  }
280
281  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
282    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
283    unsigned charSize =
284        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
285    if (charSize == 0)
286      return FSS_INVALID;
287    if (dir->fieldWidth == 0) {
288      if (!needsTerminator)
289        return charSize;
290      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
291    }
292    return (dir->fieldWidth + needsTerminator) * charSize;
293  }
294
295  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
296}
297
298// Common part of *scanf interceptors.
299// Process format string and va_list, and report all store ranges.
300// Stops when "consuming" n_inputs input items.
301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302                         const char *format, va_list aq) {
303  CHECK_GT(n_inputs, 0);
304  const char *p = format;
305
306  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
307
308  while (*p) {
309    ScanfDirective dir;
310    p = scanf_parse_next(p, allowGnuMalloc, &dir);
311    if (!p)
312      break;
313    if (dir.convSpecifier == 0) {
314      // This can only happen at the end of the format string.
315      CHECK_EQ(*p, 0);
316      break;
317    }
318    // Here the directive is valid. Do what it says.
319    if (dir.argIdx != -1) {
320      // Unsupported.
321      break;
322    }
323    if (dir.suppressed)
324      continue;
325    int size = scanf_get_value_size(&dir);
326    if (size == FSS_INVALID) {
327      Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
328             SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
329      break;
330    }
331    void *argp = va_arg(aq, void *);
332    if (dir.convSpecifier != 'n')
333      --n_inputs;
334    if (n_inputs < 0)
335      break;
336    if (size == FSS_STRLEN) {
337      size = internal_strlen((const char *)argp) + 1;
338    } else if (size == FSS_WCSLEN) {
339      // FIXME: actually use wcslen() to calculate it.
340      size = 0;
341    }
342    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
343    // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
344    if (dir.allocate) {
345      if (char *buf = *(char **)argp) {
346        if (dir.convSpecifier == 'c')
347          size = 1;
348        else if (dir.convSpecifier == 'C')
349          size = sizeof(wchar_t);
350        else if (dir.convSpecifier == 'S')
351          size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t);
352        else  // 's' or '['
353          size = internal_strlen(buf) + 1;
354        COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
355      }
356    }
357  }
358}
359
360#if SANITIZER_INTERCEPT_PRINTF
361
362struct PrintfDirective {
363  int fieldWidth;
364  int fieldPrecision;
365  int argIdx; // width argument index, or -1 if not specified ("%*n$")
366  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
367  const char *begin;
368  const char *end;
369  bool starredWidth;
370  bool starredPrecision;
371  char lengthModifier[2];
372  char convSpecifier;
373};
374
375static const char *maybe_parse_number(const char *p, int *out) {
376  if (*p >= '0' && *p <= '9')
377    p = parse_number(p, out);
378  return p;
379}
380
381static const char *maybe_parse_number_or_star(const char *p, int *out,
382                                              bool *star) {
383  if (*p == '*') {
384    *star = true;
385    ++p;
386  } else {
387    *star = false;
388    p = maybe_parse_number(p, out);
389  }
390  return p;
391}
392
393// Parse printf format string. Same as scanf_parse_next.
394static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
395  internal_memset(dir, 0, sizeof(*dir));
396  dir->argIdx = -1;
397  dir->precisionIdx = -1;
398
399  while (*p) {
400    if (*p != '%') {
401      ++p;
402      continue;
403    }
404    dir->begin = p;
405    ++p;
406    // %%
407    if (*p == '%') {
408      ++p;
409      continue;
410    }
411    if (*p == '\0') {
412      return nullptr;
413    }
414    // %n$
415    p = maybe_parse_param_index(p, &dir->precisionIdx);
416    CHECK(p);
417    // Flags
418    while (char_is_one_of(*p, "'-+ #0")) {
419      ++p;
420    }
421    // Field width
422    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
423                                   &dir->starredWidth);
424    if (!p)
425      return nullptr;
426    // Precision
427    if (*p == '.') {
428      ++p;
429      // Actual precision is optional (surprise!)
430      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
431                                     &dir->starredPrecision);
432      if (!p)
433        return nullptr;
434      // m$
435      if (dir->starredPrecision) {
436        p = maybe_parse_param_index(p, &dir->precisionIdx);
437        CHECK(p);
438      }
439    }
440    // Length modifier.
441    p = maybe_parse_length_modifier(p, dir->lengthModifier);
442    // Conversion specifier.
443    dir->convSpecifier = *p++;
444    dir->end = p;
445    break;
446  }
447  return p;
448}
449
450static int printf_get_value_size(PrintfDirective *dir) {
451  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
452    unsigned charSize =
453        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
454    if (charSize == 0)
455      return FSS_INVALID;
456    if (char_is_one_of(dir->convSpecifier, "sS")) {
457      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
458    }
459    return charSize;
460  }
461
462  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
463}
464
465#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
466  do {                                                             \
467    if (format_is_float_conv(convSpecifier)) {                     \
468      switch (size) {                                              \
469      case 8:                                                      \
470        va_arg(*aq, double);                                       \
471        break;                                                     \
472      case 12:                                                     \
473        va_arg(*aq, long double);                                  \
474        break;                                                     \
475      case 16:                                                     \
476        va_arg(*aq, long double);                                  \
477        break;                                                     \
478      default:                                                     \
479        Report("WARNING: unexpected floating-point arg size"       \
480               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
481        return;                                                    \
482      }                                                            \
483    } else {                                                       \
484      switch (size) {                                              \
485      case 1:                                                      \
486      case 2:                                                      \
487      case 4:                                                      \
488        va_arg(*aq, u32);                                          \
489        break;                                                     \
490      case 8:                                                      \
491        va_arg(*aq, u64);                                          \
492        break;                                                     \
493      default:                                                     \
494        Report("WARNING: unexpected arg size"                      \
495               " in printf interceptor: %zu\n", static_cast<uptr>(size));             \
496        return;                                                    \
497      }                                                            \
498    }                                                              \
499  } while (0)
500
501// Common part of *printf interceptors.
502// Process format string and va_list, and report all load ranges.
503static void printf_common(void *ctx, const char *format, va_list aq) {
504  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
505
506  const char *p = format;
507
508  while (*p) {
509    PrintfDirective dir;
510    p = printf_parse_next(p, &dir);
511    if (!p)
512      break;
513    if (dir.convSpecifier == 0) {
514      // This can only happen at the end of the format string.
515      CHECK_EQ(*p, 0);
516      break;
517    }
518    // Here the directive is valid. Do what it says.
519    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
520      // Unsupported.
521      break;
522    }
523    if (dir.starredWidth) {
524      // Dynamic width
525      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
526    }
527    if (dir.starredPrecision) {
528      // Dynamic precision
529      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
530    }
531    // %m does not require an argument: strlen(errno).
532    if (dir.convSpecifier == 'm')
533      continue;
534    int size = printf_get_value_size(&dir);
535    if (size == FSS_INVALID) {
536      static int ReportedOnce;
537      if (!ReportedOnce++)
538        Report(
539            "%s: WARNING: unexpected format specifier in printf "
540            "interceptor: %.*s (reported once per process)\n",
541            SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
542      break;
543    }
544    if (dir.convSpecifier == 'n') {
545      void *argp = va_arg(aq, void *);
546      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
547      continue;
548    } else if (size == FSS_STRLEN) {
549      if (void *argp = va_arg(aq, void *)) {
550        uptr len;
551        if (dir.starredPrecision) {
552          // FIXME: properly support starred precision for strings.
553          len = 0;
554        } else if (dir.fieldPrecision > 0) {
555          // Won't read more than "precision" symbols.
556          len = internal_strnlen((const char *)argp, dir.fieldPrecision);
557          if (len < (uptr)dir.fieldPrecision)
558            len++;
559        } else {
560          // Whole string will be accessed.
561          len = internal_strlen((const char *)argp) + 1;
562        }
563        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
564      }
565    } else if (size == FSS_WCSLEN) {
566      if (void *argp = va_arg(aq, void *)) {
567        // FIXME: Properly support wide-character strings (via wcsrtombs).
568        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
569      }
570    } else {
571      // Skip non-pointer args
572      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
573    }
574  }
575}
576
577#endif // SANITIZER_INTERCEPT_PRINTF
578