1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handling of format string in scanf and friends. The structure of format 10 // strings for fscanf() are described in C99 7.19.6.2. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/FormatString.h" 15 #include "FormatStringParsing.h" 16 #include "clang/Basic/TargetInfo.h" 17 18 using clang::analyze_format_string::ArgType; 19 using clang::analyze_format_string::FormatStringHandler; 20 using clang::analyze_format_string::LengthModifier; 21 using clang::analyze_format_string::OptionalAmount; 22 using clang::analyze_format_string::ConversionSpecifier; 23 using clang::analyze_scanf::ScanfConversionSpecifier; 24 using clang::analyze_scanf::ScanfSpecifier; 25 using clang::UpdateOnReturn; 26 using namespace clang; 27 28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 29 ScanfSpecifierResult; 30 31 static bool ParseScanList(FormatStringHandler &H, 32 ScanfConversionSpecifier &CS, 33 const char *&Beg, const char *E) { 34 const char *I = Beg; 35 const char *start = I - 1; 36 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 37 38 // No more characters? 39 if (I == E) { 40 H.HandleIncompleteScanList(start, I); 41 return true; 42 } 43 44 // Special case: ']' is the first character. 45 if (*I == ']') { 46 if (++I == E) { 47 H.HandleIncompleteScanList(start, I - 1); 48 return true; 49 } 50 } 51 52 // Special case: "^]" are the first characters. 53 if (I + 1 != E && I[0] == '^' && I[1] == ']') { 54 I += 2; 55 if (I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 // Look for a ']' character which denotes the end of the scan list. 62 while (*I != ']') { 63 if (++I == E) { 64 H.HandleIncompleteScanList(start, I - 1); 65 return true; 66 } 67 } 68 69 CS.setEndScanList(I); 70 return false; 71 } 72 73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 74 // We can possibly refactor. 75 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 76 const char *&Beg, 77 const char *E, 78 unsigned &argIndex, 79 const LangOptions &LO, 80 const TargetInfo &Target) { 81 using namespace clang::analyze_format_string; 82 using namespace clang::analyze_scanf; 83 const char *I = Beg; 84 const char *Start = nullptr; 85 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 86 87 // Look for a '%' character that indicates the start of a format specifier. 88 for ( ; I != E ; ++I) { 89 char c = *I; 90 if (c == '\0') { 91 // Detect spurious null characters, which are likely errors. 92 H.HandleNullChar(I); 93 return true; 94 } 95 if (c == '%') { 96 Start = I++; // Record the start of the format specifier. 97 break; 98 } 99 } 100 101 // No format specifier found? 102 if (!Start) 103 return false; 104 105 if (I == E) { 106 // No more characters left? 107 H.HandleIncompleteSpecifier(Start, E - Start); 108 return true; 109 } 110 111 ScanfSpecifier FS; 112 if (ParseArgPosition(H, FS, Start, I, E)) 113 return true; 114 115 if (I == E) { 116 // No more characters left? 117 H.HandleIncompleteSpecifier(Start, E - Start); 118 return true; 119 } 120 121 // Look for '*' flag if it is present. 122 if (*I == '*') { 123 FS.setSuppressAssignment(I); 124 if (++I == E) { 125 H.HandleIncompleteSpecifier(Start, E - Start); 126 return true; 127 } 128 } 129 130 // Look for the field width (if any). Unlike printf, this is either 131 // a fixed integer or isn't present. 132 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 133 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 134 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 135 FS.setFieldWidth(Amt); 136 137 if (I == E) { 138 // No more characters left? 139 H.HandleIncompleteSpecifier(Start, E - Start); 140 return true; 141 } 142 } 143 144 // Look for the length modifier. 145 if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { 146 // No more characters left? 147 H.HandleIncompleteSpecifier(Start, E - Start); 148 return true; 149 } 150 151 // Detect spurious null characters, which are likely errors. 152 if (*I == '\0') { 153 H.HandleNullChar(I); 154 return true; 155 } 156 157 // Finally, look for the conversion specifier. 158 const char *conversionPosition = I++; 159 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 160 switch (*conversionPosition) { 161 default: 162 break; 163 case '%': k = ConversionSpecifier::PercentArg; break; 164 case 'b': k = ConversionSpecifier::bArg; break; 165 case 'A': k = ConversionSpecifier::AArg; break; 166 case 'E': k = ConversionSpecifier::EArg; break; 167 case 'F': k = ConversionSpecifier::FArg; break; 168 case 'G': k = ConversionSpecifier::GArg; break; 169 case 'X': k = ConversionSpecifier::XArg; break; 170 case 'a': k = ConversionSpecifier::aArg; break; 171 case 'd': k = ConversionSpecifier::dArg; break; 172 case 'e': k = ConversionSpecifier::eArg; break; 173 case 'f': k = ConversionSpecifier::fArg; break; 174 case 'g': k = ConversionSpecifier::gArg; break; 175 case 'i': k = ConversionSpecifier::iArg; break; 176 case 'n': k = ConversionSpecifier::nArg; break; 177 case 'c': k = ConversionSpecifier::cArg; break; 178 case 'C': k = ConversionSpecifier::CArg; break; 179 case 'S': k = ConversionSpecifier::SArg; break; 180 case '[': k = ConversionSpecifier::ScanListArg; break; 181 case 'u': k = ConversionSpecifier::uArg; break; 182 case 'x': k = ConversionSpecifier::xArg; break; 183 case 'o': k = ConversionSpecifier::oArg; break; 184 case 's': k = ConversionSpecifier::sArg; break; 185 case 'p': k = ConversionSpecifier::pArg; break; 186 // Apple extensions 187 // Apple-specific 188 case 'D': 189 if (Target.getTriple().isOSDarwin()) 190 k = ConversionSpecifier::DArg; 191 break; 192 case 'O': 193 if (Target.getTriple().isOSDarwin()) 194 k = ConversionSpecifier::OArg; 195 break; 196 case 'U': 197 if (Target.getTriple().isOSDarwin()) 198 k = ConversionSpecifier::UArg; 199 break; 200 } 201 ScanfConversionSpecifier CS(conversionPosition, k); 202 if (k == ScanfConversionSpecifier::ScanListArg) { 203 if (ParseScanList(H, CS, I, E)) 204 return true; 205 } 206 FS.setConversionSpecifier(CS); 207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 208 && !FS.usesPositionalArg()) 209 FS.setArgIndex(argIndex++); 210 211 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 213 214 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 215 unsigned Len = I - Beg; 216 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { 217 CS.setEndScanList(Beg + Len); 218 FS.setConversionSpecifier(CS); 219 } 220 // Assume the conversion takes one argument. 221 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); 222 } 223 return ScanfSpecifierResult(Start, FS); 224 } 225 226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 227 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 228 229 if (!CS.consumesDataArgument()) 230 return ArgType::Invalid(); 231 232 switch(CS.getKind()) { 233 // Signed int. 234 case ConversionSpecifier::dArg: 235 case ConversionSpecifier::DArg: 236 case ConversionSpecifier::iArg: 237 switch (LM.getKind()) { 238 case LengthModifier::None: 239 return ArgType::PtrTo(Ctx.IntTy); 240 case LengthModifier::AsChar: 241 return ArgType::PtrTo(ArgType::AnyCharTy); 242 case LengthModifier::AsShort: 243 return ArgType::PtrTo(Ctx.ShortTy); 244 case LengthModifier::AsLong: 245 return ArgType::PtrTo(Ctx.LongTy); 246 case LengthModifier::AsLongLong: 247 case LengthModifier::AsQuad: 248 return ArgType::PtrTo(Ctx.LongLongTy); 249 case LengthModifier::AsInt64: 250 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 251 case LengthModifier::AsIntMax: 252 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 253 case LengthModifier::AsSizeT: 254 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 255 case LengthModifier::AsPtrDiff: 256 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 257 case LengthModifier::AsLongDouble: 258 // GNU extension. 259 return ArgType::PtrTo(Ctx.LongLongTy); 260 case LengthModifier::AsAllocate: 261 case LengthModifier::AsMAllocate: 262 case LengthModifier::AsInt32: 263 case LengthModifier::AsInt3264: 264 case LengthModifier::AsWide: 265 case LengthModifier::AsShortLong: 266 return ArgType::Invalid(); 267 } 268 llvm_unreachable("Unsupported LengthModifier Type"); 269 270 // Unsigned int. 271 case ConversionSpecifier::bArg: 272 case ConversionSpecifier::oArg: 273 case ConversionSpecifier::OArg: 274 case ConversionSpecifier::uArg: 275 case ConversionSpecifier::UArg: 276 case ConversionSpecifier::xArg: 277 case ConversionSpecifier::XArg: 278 switch (LM.getKind()) { 279 case LengthModifier::None: 280 return ArgType::PtrTo(Ctx.UnsignedIntTy); 281 case LengthModifier::AsChar: 282 return ArgType::PtrTo(Ctx.UnsignedCharTy); 283 case LengthModifier::AsShort: 284 return ArgType::PtrTo(Ctx.UnsignedShortTy); 285 case LengthModifier::AsLong: 286 return ArgType::PtrTo(Ctx.UnsignedLongTy); 287 case LengthModifier::AsLongLong: 288 case LengthModifier::AsQuad: 289 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 290 case LengthModifier::AsInt64: 291 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 292 case LengthModifier::AsIntMax: 293 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 294 case LengthModifier::AsSizeT: 295 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 296 case LengthModifier::AsPtrDiff: 297 return ArgType::PtrTo( 298 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 299 case LengthModifier::AsLongDouble: 300 // GNU extension. 301 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 302 case LengthModifier::AsAllocate: 303 case LengthModifier::AsMAllocate: 304 case LengthModifier::AsInt32: 305 case LengthModifier::AsInt3264: 306 case LengthModifier::AsWide: 307 case LengthModifier::AsShortLong: 308 return ArgType::Invalid(); 309 } 310 llvm_unreachable("Unsupported LengthModifier Type"); 311 312 // Float. 313 case ConversionSpecifier::aArg: 314 case ConversionSpecifier::AArg: 315 case ConversionSpecifier::eArg: 316 case ConversionSpecifier::EArg: 317 case ConversionSpecifier::fArg: 318 case ConversionSpecifier::FArg: 319 case ConversionSpecifier::gArg: 320 case ConversionSpecifier::GArg: 321 switch (LM.getKind()) { 322 case LengthModifier::None: 323 return ArgType::PtrTo(Ctx.FloatTy); 324 case LengthModifier::AsLong: 325 return ArgType::PtrTo(Ctx.DoubleTy); 326 case LengthModifier::AsLongDouble: 327 return ArgType::PtrTo(Ctx.LongDoubleTy); 328 default: 329 return ArgType::Invalid(); 330 } 331 332 // Char, string and scanlist. 333 case ConversionSpecifier::cArg: 334 case ConversionSpecifier::sArg: 335 case ConversionSpecifier::ScanListArg: 336 switch (LM.getKind()) { 337 case LengthModifier::None: 338 return ArgType::PtrTo(ArgType::AnyCharTy); 339 case LengthModifier::AsLong: 340 case LengthModifier::AsWide: 341 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 342 case LengthModifier::AsAllocate: 343 case LengthModifier::AsMAllocate: 344 return ArgType::PtrTo(ArgType::CStrTy); 345 case LengthModifier::AsShort: 346 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 347 return ArgType::PtrTo(ArgType::AnyCharTy); 348 [[fallthrough]]; 349 default: 350 return ArgType::Invalid(); 351 } 352 case ConversionSpecifier::CArg: 353 case ConversionSpecifier::SArg: 354 // FIXME: Mac OS X specific? 355 switch (LM.getKind()) { 356 case LengthModifier::None: 357 case LengthModifier::AsWide: 358 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 359 case LengthModifier::AsAllocate: 360 case LengthModifier::AsMAllocate: 361 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 362 case LengthModifier::AsShort: 363 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 364 return ArgType::PtrTo(ArgType::AnyCharTy); 365 [[fallthrough]]; 366 default: 367 return ArgType::Invalid(); 368 } 369 370 // Pointer. 371 case ConversionSpecifier::pArg: 372 return ArgType::PtrTo(ArgType::CPointerTy); 373 374 // Write-back. 375 case ConversionSpecifier::nArg: 376 switch (LM.getKind()) { 377 case LengthModifier::None: 378 return ArgType::PtrTo(Ctx.IntTy); 379 case LengthModifier::AsChar: 380 return ArgType::PtrTo(Ctx.SignedCharTy); 381 case LengthModifier::AsShort: 382 return ArgType::PtrTo(Ctx.ShortTy); 383 case LengthModifier::AsLong: 384 return ArgType::PtrTo(Ctx.LongTy); 385 case LengthModifier::AsLongLong: 386 case LengthModifier::AsQuad: 387 return ArgType::PtrTo(Ctx.LongLongTy); 388 case LengthModifier::AsInt64: 389 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 390 case LengthModifier::AsIntMax: 391 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 392 case LengthModifier::AsSizeT: 393 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 394 case LengthModifier::AsPtrDiff: 395 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 396 case LengthModifier::AsLongDouble: 397 return ArgType(); // FIXME: Is this a known extension? 398 case LengthModifier::AsAllocate: 399 case LengthModifier::AsMAllocate: 400 case LengthModifier::AsInt32: 401 case LengthModifier::AsInt3264: 402 case LengthModifier::AsWide: 403 case LengthModifier::AsShortLong: 404 return ArgType::Invalid(); 405 } 406 407 default: 408 break; 409 } 410 411 return ArgType(); 412 } 413 414 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 415 const LangOptions &LangOpt, 416 ASTContext &Ctx) { 417 418 // %n is different from other conversion specifiers; don't try to fix it. 419 if (CS.getKind() == ConversionSpecifier::nArg) 420 return false; 421 422 if (!QT->isPointerType()) 423 return false; 424 425 QualType PT = QT->getPointeeType(); 426 427 // If it's an enum, get its underlying type. 428 if (const EnumType *ETy = PT->getAs<EnumType>()) { 429 // Don't try to fix incomplete enums. 430 if (!ETy->getDecl()->isComplete()) 431 return false; 432 PT = ETy->getDecl()->getIntegerType(); 433 } 434 435 const BuiltinType *BT = PT->getAs<BuiltinType>(); 436 if (!BT) 437 return false; 438 439 // Pointer to a character. 440 if (PT->isAnyCharacterType()) { 441 CS.setKind(ConversionSpecifier::sArg); 442 if (PT->isWideCharType()) 443 LM.setKind(LengthModifier::AsWideChar); 444 else 445 LM.setKind(LengthModifier::None); 446 447 // If we know the target array length, we can use it as a field width. 448 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 449 if (CAT->getSizeModifier() == ArraySizeModifier::Normal) 450 FieldWidth = OptionalAmount(OptionalAmount::Constant, 451 CAT->getSize().getZExtValue() - 1, 452 "", 0, false); 453 454 } 455 return true; 456 } 457 458 // Figure out the length modifier. 459 switch (BT->getKind()) { 460 // no modifier 461 case BuiltinType::UInt: 462 case BuiltinType::Int: 463 case BuiltinType::Float: 464 LM.setKind(LengthModifier::None); 465 break; 466 467 // hh 468 case BuiltinType::Char_U: 469 case BuiltinType::UChar: 470 case BuiltinType::Char_S: 471 case BuiltinType::SChar: 472 LM.setKind(LengthModifier::AsChar); 473 break; 474 475 // h 476 case BuiltinType::Short: 477 case BuiltinType::UShort: 478 LM.setKind(LengthModifier::AsShort); 479 break; 480 481 // l 482 case BuiltinType::Long: 483 case BuiltinType::ULong: 484 case BuiltinType::Double: 485 LM.setKind(LengthModifier::AsLong); 486 break; 487 488 // ll 489 case BuiltinType::LongLong: 490 case BuiltinType::ULongLong: 491 LM.setKind(LengthModifier::AsLongLong); 492 break; 493 494 // L 495 case BuiltinType::LongDouble: 496 LM.setKind(LengthModifier::AsLongDouble); 497 break; 498 499 // Don't know. 500 default: 501 return false; 502 } 503 504 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 505 if (LangOpt.C99 || LangOpt.CPlusPlus11) 506 namedTypeToLengthModifier(PT, LM); 507 508 // If fixing the length modifier was enough, we are done. 509 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { 510 const analyze_scanf::ArgType &AT = getArgType(Ctx); 511 if (AT.isValid() && AT.matchesType(Ctx, QT)) 512 return true; 513 } 514 515 // Figure out the conversion specifier. 516 if (PT->isRealFloatingType()) 517 CS.setKind(ConversionSpecifier::fArg); 518 else if (PT->isSignedIntegerType()) 519 CS.setKind(ConversionSpecifier::dArg); 520 else if (PT->isUnsignedIntegerType()) 521 CS.setKind(ConversionSpecifier::uArg); 522 else 523 llvm_unreachable("Unexpected type"); 524 525 return true; 526 } 527 528 void ScanfSpecifier::toString(raw_ostream &os) const { 529 os << "%"; 530 531 if (usesPositionalArg()) 532 os << getPositionalArgIndex() << "$"; 533 if (SuppressAssignment) 534 os << "*"; 535 536 FieldWidth.toString(os); 537 os << LM.toString(); 538 os << CS.toString(); 539 } 540 541 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 542 const char *I, 543 const char *E, 544 const LangOptions &LO, 545 const TargetInfo &Target) { 546 547 unsigned argIndex = 0; 548 549 // Keep looking for a format specifier until we have exhausted the string. 550 while (I != E) { 551 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 552 LO, Target); 553 // Did a fail-stop error of any kind occur when parsing the specifier? 554 // If so, don't do any more processing. 555 if (FSR.shouldStop()) 556 return true; 557 // Did we exhaust the string or encounter an error that 558 // we can recover from? 559 if (!FSR.hasValue()) 560 continue; 561 // We have a format specifier. Pass it to the callback. 562 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 563 I - FSR.getStart())) { 564 return true; 565 } 566 } 567 assert(I == E && "Format string not exhausted"); 568 return false; 569 } 570