1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Scanf/printf implementation for use in *Sanitizer interceptors. 10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 12// with a few common GNU extensions. 13// 14//===----------------------------------------------------------------------===// 15 16#include <stdarg.h> 17 18static const char *parse_number(const char *p, int *out) { 19 *out = internal_atoll(p); 20 while (*p >= '0' && *p <= '9') 21 ++p; 22 return p; 23} 24 25static const char *maybe_parse_param_index(const char *p, int *out) { 26 // n$ 27 if (*p >= '0' && *p <= '9') { 28 int number; 29 const char *q = parse_number(p, &number); 30 CHECK(q); 31 if (*q == '$') { 32 *out = number; 33 p = q + 1; 34 } 35 } 36 37 // Otherwise, do not change p. This will be re-parsed later as the field 38 // width. 39 return p; 40} 41 42static bool char_is_one_of(char c, const char *s) { 43 return !!internal_strchr(s, c); 44} 45 46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 47 if (char_is_one_of(*p, "jztLq")) { 48 ll[0] = *p; 49 ++p; 50 } else if (*p == 'h') { 51 ll[0] = 'h'; 52 ++p; 53 if (*p == 'h') { 54 ll[1] = 'h'; 55 ++p; 56 } 57 } else if (*p == 'l') { 58 ll[0] = 'l'; 59 ++p; 60 if (*p == 'l') { 61 ll[1] = 'l'; 62 ++p; 63 } 64 } 65 return p; 66} 67 68// Returns true if the character is an integer conversion specifier. 69static bool format_is_integer_conv(char c) { 70 return char_is_one_of(c, "diouxXn"); 71} 72 73// Returns true if the character is an floating point conversion specifier. 74static bool format_is_float_conv(char c) { 75 return char_is_one_of(c, "aAeEfFgG"); 76} 77 78// Returns string output character size for string-like conversions, 79// or 0 if the conversion is invalid. 80static int format_get_char_size(char convSpecifier, 81 const char lengthModifier[2]) { 82 if (char_is_one_of(convSpecifier, "CS")) { 83 return sizeof(wchar_t); 84 } 85 86 if (char_is_one_of(convSpecifier, "cs[")) { 87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 88 return sizeof(wchar_t); 89 else if (lengthModifier[0] == '\0') 90 return sizeof(char); 91 } 92 93 return 0; 94} 95 96enum FormatStoreSize { 97 // Store size not known in advance; can be calculated as wcslen() of the 98 // destination buffer. 99 FSS_WCSLEN = -2, 100 // Store size not known in advance; can be calculated as strlen() of the 101 // destination buffer. 102 FSS_STRLEN = -1, 103 // Invalid conversion specifier. 104 FSS_INVALID = 0 105}; 106 107// Returns the memory size of a format directive (if >0), or a value of 108// FormatStoreSize. 109static int format_get_value_size(char convSpecifier, 110 const char lengthModifier[2], 111 bool promote_float) { 112 if (format_is_integer_conv(convSpecifier)) { 113 switch (lengthModifier[0]) { 114 case 'h': 115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 116 case 'l': 117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 118 case 'q': 119 return sizeof(long long); 120 case 'L': 121 return sizeof(long long); 122 case 'j': 123 return sizeof(INTMAX_T); 124 case 'z': 125 return sizeof(SIZE_T); 126 case 't': 127 return sizeof(PTRDIFF_T); 128 case 0: 129 return sizeof(int); 130 default: 131 return FSS_INVALID; 132 } 133 } 134 135 if (format_is_float_conv(convSpecifier)) { 136 switch (lengthModifier[0]) { 137 case 'L': 138 case 'q': 139 return sizeof(long double); 140 case 'l': 141 return lengthModifier[1] == 'l' ? sizeof(long double) 142 : sizeof(double); 143 case 0: 144 // Printf promotes floats to doubles but scanf does not 145 return promote_float ? sizeof(double) : sizeof(float); 146 default: 147 return FSS_INVALID; 148 } 149 } 150 151 if (convSpecifier == 'p') { 152 if (lengthModifier[0] != 0) 153 return FSS_INVALID; 154 return sizeof(void *); 155 } 156 157 return FSS_INVALID; 158} 159 160struct ScanfDirective { 161 int argIdx; // argument index, or -1 if not specified ("%n$") 162 int fieldWidth; 163 const char *begin; 164 const char *end; 165 bool suppressed; // suppress assignment ("*") 166 bool allocate; // allocate space ("m") 167 char lengthModifier[2]; 168 char convSpecifier; 169 bool maybeGnuMalloc; 170}; 171 172// Parse scanf format string. If a valid directive in encountered, it is 173// returned in dir. This function returns the pointer to the first 174// unprocessed character, or 0 in case of error. 175// In case of the end-of-string, a pointer to the closing \0 is returned. 176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 177 ScanfDirective *dir) { 178 internal_memset(dir, 0, sizeof(*dir)); 179 dir->argIdx = -1; 180 181 while (*p) { 182 if (*p != '%') { 183 ++p; 184 continue; 185 } 186 dir->begin = p; 187 ++p; 188 // %% 189 if (*p == '%') { 190 ++p; 191 continue; 192 } 193 if (*p == '\0') { 194 return nullptr; 195 } 196 // %n$ 197 p = maybe_parse_param_index(p, &dir->argIdx); 198 CHECK(p); 199 // * 200 if (*p == '*') { 201 dir->suppressed = true; 202 ++p; 203 } 204 // Field width 205 if (*p >= '0' && *p <= '9') { 206 p = parse_number(p, &dir->fieldWidth); 207 CHECK(p); 208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 209 return nullptr; 210 } 211 // m 212 if (*p == 'm') { 213 dir->allocate = true; 214 ++p; 215 } 216 // Length modifier. 217 p = maybe_parse_length_modifier(p, dir->lengthModifier); 218 // Conversion specifier. 219 dir->convSpecifier = *p++; 220 // Consume %[...] expression. 221 if (dir->convSpecifier == '[') { 222 if (*p == '^') 223 ++p; 224 if (*p == ']') 225 ++p; 226 while (*p && *p != ']') 227 ++p; 228 if (*p == 0) 229 return nullptr; // unexpected end of string 230 // Consume the closing ']'. 231 ++p; 232 } 233 // This is unfortunately ambiguous between old GNU extension 234 // of %as, %aS and %a[...] and newer POSIX %a followed by 235 // letters s, S or [. 236 if (allowGnuMalloc && dir->convSpecifier == 'a' && 237 !dir->lengthModifier[0]) { 238 if (*p == 's' || *p == 'S') { 239 dir->maybeGnuMalloc = true; 240 ++p; 241 } else if (*p == '[') { 242 // Watch for %a[h-j%d], if % appears in the 243 // [...] range, then we need to give up, we don't know 244 // if scanf will parse it as POSIX %a [h-j %d ] or 245 // GNU allocation of string with range dh-j plus %. 246 const char *q = p + 1; 247 if (*q == '^') 248 ++q; 249 if (*q == ']') 250 ++q; 251 while (*q && *q != ']' && *q != '%') 252 ++q; 253 if (*q == 0 || *q == '%') 254 return nullptr; 255 p = q + 1; // Consume the closing ']'. 256 dir->maybeGnuMalloc = true; 257 } 258 } 259 dir->end = p; 260 break; 261 } 262 return p; 263} 264 265static int scanf_get_value_size(ScanfDirective *dir) { 266 if (dir->allocate) { 267 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 268 return FSS_INVALID; 269 return sizeof(char *); 270 } 271 272 if (dir->maybeGnuMalloc) { 273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 274 return FSS_INVALID; 275 // This is ambiguous, so check the smaller size of char * (if it is 276 // a GNU extension of %as, %aS or %a[...]) and float (if it is 277 // POSIX %a followed by s, S or [ letters). 278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 279 } 280 281 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 282 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 283 unsigned charSize = 284 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 285 if (charSize == 0) 286 return FSS_INVALID; 287 if (dir->fieldWidth == 0) { 288 if (!needsTerminator) 289 return charSize; 290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 291 } 292 return (dir->fieldWidth + needsTerminator) * charSize; 293 } 294 295 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 296} 297 298// Common part of *scanf interceptors. 299// Process format string and va_list, and report all store ranges. 300// Stops when "consuming" n_inputs input items. 301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 302 const char *format, va_list aq) { 303 CHECK_GT(n_inputs, 0); 304 const char *p = format; 305 306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 307 308 while (*p) { 309 ScanfDirective dir; 310 p = scanf_parse_next(p, allowGnuMalloc, &dir); 311 if (!p) 312 break; 313 if (dir.convSpecifier == 0) { 314 // This can only happen at the end of the format string. 315 CHECK_EQ(*p, 0); 316 break; 317 } 318 // Here the directive is valid. Do what it says. 319 if (dir.argIdx != -1) { 320 // Unsupported. 321 break; 322 } 323 if (dir.suppressed) 324 continue; 325 int size = scanf_get_value_size(&dir); 326 if (size == FSS_INVALID) { 327 Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n", 328 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); 329 break; 330 } 331 void *argp = va_arg(aq, void *); 332 if (dir.convSpecifier != 'n') 333 --n_inputs; 334 if (n_inputs < 0) 335 break; 336 if (size == FSS_STRLEN) { 337 size = internal_strlen((const char *)argp) + 1; 338 } else if (size == FSS_WCSLEN) { 339 // FIXME: actually use wcslen() to calculate it. 340 size = 0; 341 } 342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 343 // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well. 344 if (dir.allocate) { 345 if (char *buf = *(char **)argp) { 346 if (dir.convSpecifier == 'c') 347 size = 1; 348 else if (dir.convSpecifier == 'C') 349 size = sizeof(wchar_t); 350 else if (dir.convSpecifier == 'S') 351 size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t); 352 else // 's' or '[' 353 size = internal_strlen(buf) + 1; 354 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); 355 } 356 } 357 } 358} 359 360#if SANITIZER_INTERCEPT_PRINTF 361 362struct PrintfDirective { 363 int fieldWidth; 364 int fieldPrecision; 365 int argIdx; // width argument index, or -1 if not specified ("%*n$") 366 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 367 const char *begin; 368 const char *end; 369 bool starredWidth; 370 bool starredPrecision; 371 char lengthModifier[2]; 372 char convSpecifier; 373}; 374 375static const char *maybe_parse_number(const char *p, int *out) { 376 if (*p >= '0' && *p <= '9') 377 p = parse_number(p, out); 378 return p; 379} 380 381static const char *maybe_parse_number_or_star(const char *p, int *out, 382 bool *star) { 383 if (*p == '*') { 384 *star = true; 385 ++p; 386 } else { 387 *star = false; 388 p = maybe_parse_number(p, out); 389 } 390 return p; 391} 392 393// Parse printf format string. Same as scanf_parse_next. 394static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 395 internal_memset(dir, 0, sizeof(*dir)); 396 dir->argIdx = -1; 397 dir->precisionIdx = -1; 398 399 while (*p) { 400 if (*p != '%') { 401 ++p; 402 continue; 403 } 404 dir->begin = p; 405 ++p; 406 // %% 407 if (*p == '%') { 408 ++p; 409 continue; 410 } 411 if (*p == '\0') { 412 return nullptr; 413 } 414 // %n$ 415 p = maybe_parse_param_index(p, &dir->precisionIdx); 416 CHECK(p); 417 // Flags 418 while (char_is_one_of(*p, "'-+ #0")) { 419 ++p; 420 } 421 // Field width 422 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 423 &dir->starredWidth); 424 if (!p) 425 return nullptr; 426 // Precision 427 if (*p == '.') { 428 ++p; 429 // Actual precision is optional (surprise!) 430 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 431 &dir->starredPrecision); 432 if (!p) 433 return nullptr; 434 // m$ 435 if (dir->starredPrecision) { 436 p = maybe_parse_param_index(p, &dir->precisionIdx); 437 CHECK(p); 438 } 439 } 440 // Length modifier. 441 p = maybe_parse_length_modifier(p, dir->lengthModifier); 442 // Conversion specifier. 443 dir->convSpecifier = *p++; 444 dir->end = p; 445 break; 446 } 447 return p; 448} 449 450static int printf_get_value_size(PrintfDirective *dir) { 451 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 452 unsigned charSize = 453 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 454 if (charSize == 0) 455 return FSS_INVALID; 456 if (char_is_one_of(dir->convSpecifier, "sS")) { 457 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 458 } 459 return charSize; 460 } 461 462 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 463} 464 465#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 466 do { \ 467 if (format_is_float_conv(convSpecifier)) { \ 468 switch (size) { \ 469 case 8: \ 470 va_arg(*aq, double); \ 471 break; \ 472 case 12: \ 473 va_arg(*aq, long double); \ 474 break; \ 475 case 16: \ 476 va_arg(*aq, long double); \ 477 break; \ 478 default: \ 479 Report("WARNING: unexpected floating-point arg size" \ 480 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ 481 return; \ 482 } \ 483 } else { \ 484 switch (size) { \ 485 case 1: \ 486 case 2: \ 487 case 4: \ 488 va_arg(*aq, u32); \ 489 break; \ 490 case 8: \ 491 va_arg(*aq, u64); \ 492 break; \ 493 default: \ 494 Report("WARNING: unexpected arg size" \ 495 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ 496 return; \ 497 } \ 498 } \ 499 } while (0) 500 501// Common part of *printf interceptors. 502// Process format string and va_list, and report all load ranges. 503static void printf_common(void *ctx, const char *format, va_list aq) { 504 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 505 506 const char *p = format; 507 508 while (*p) { 509 PrintfDirective dir; 510 p = printf_parse_next(p, &dir); 511 if (!p) 512 break; 513 if (dir.convSpecifier == 0) { 514 // This can only happen at the end of the format string. 515 CHECK_EQ(*p, 0); 516 break; 517 } 518 // Here the directive is valid. Do what it says. 519 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 520 // Unsupported. 521 break; 522 } 523 if (dir.starredWidth) { 524 // Dynamic width 525 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 526 } 527 if (dir.starredPrecision) { 528 // Dynamic precision 529 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 530 } 531 // %m does not require an argument: strlen(errno). 532 if (dir.convSpecifier == 'm') 533 continue; 534 int size = printf_get_value_size(&dir); 535 if (size == FSS_INVALID) { 536 static int ReportedOnce; 537 if (!ReportedOnce++) 538 Report( 539 "%s: WARNING: unexpected format specifier in printf " 540 "interceptor: %.*s (reported once per process)\n", 541 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); 542 break; 543 } 544 if (dir.convSpecifier == 'n') { 545 void *argp = va_arg(aq, void *); 546 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 547 continue; 548 } else if (size == FSS_STRLEN) { 549 if (void *argp = va_arg(aq, void *)) { 550 uptr len; 551 if (dir.starredPrecision) { 552 // FIXME: properly support starred precision for strings. 553 len = 0; 554 } else if (dir.fieldPrecision > 0) { 555 // Won't read more than "precision" symbols. 556 len = internal_strnlen((const char *)argp, dir.fieldPrecision); 557 if (len < (uptr)dir.fieldPrecision) 558 len++; 559 } else { 560 // Whole string will be accessed. 561 len = internal_strlen((const char *)argp) + 1; 562 } 563 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len); 564 } 565 } else if (size == FSS_WCSLEN) { 566 if (void *argp = va_arg(aq, void *)) { 567 // FIXME: Properly support wide-character strings (via wcsrtombs). 568 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0); 569 } 570 } else { 571 // Skip non-pointer args 572 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 573 } 574 } 575} 576 577#endif // SANITIZER_INTERCEPT_PRINTF 578