1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handling of format string in printf and friends. The structure of format 10 // strings for fprintf() are described in C99 7.19.6.1. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "FormatStringParsing.h" 15 #include "clang/AST/FormatString.h" 16 #include "clang/AST/OSLog.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "llvm/Support/Regex.h" 19 20 using clang::analyze_format_string::ArgType; 21 using clang::analyze_format_string::FormatStringHandler; 22 using clang::analyze_format_string::LengthModifier; 23 using clang::analyze_format_string::OptionalAmount; 24 using clang::analyze_format_string::ConversionSpecifier; 25 using clang::analyze_printf::PrintfSpecifier; 26 27 using namespace clang; 28 29 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 30 PrintfSpecifierResult; 31 32 //===----------------------------------------------------------------------===// 33 // Methods for parsing format strings. 34 //===----------------------------------------------------------------------===// 35 36 using analyze_format_string::ParseNonPositionAmount; 37 38 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 39 const char *Start, const char *&Beg, const char *E, 40 unsigned *argIndex) { 41 if (argIndex) { 42 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 43 } else { 44 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 45 analyze_format_string::PrecisionPos); 46 if (Amt.isInvalid()) 47 return true; 48 FS.setPrecision(Amt); 49 } 50 return false; 51 } 52 53 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, 54 const char *FlagBeg, const char *E, bool Warn) { 55 StringRef Flag(FlagBeg, E - FlagBeg); 56 // Currently there is only one flag. 57 if (Flag == "tt") { 58 FS.setHasObjCTechnicalTerm(FlagBeg); 59 return false; 60 } 61 // Handle either the case of no flag or an invalid flag. 62 if (Warn) { 63 if (Flag == "") 64 H.HandleEmptyObjCModifierFlag(FlagBeg, E - FlagBeg); 65 else 66 H.HandleInvalidObjCModifierFlag(FlagBeg, E - FlagBeg); 67 } 68 return true; 69 } 70 71 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 72 const char *&Beg, 73 const char *E, 74 unsigned &argIndex, 75 const LangOptions &LO, 76 const TargetInfo &Target, 77 bool Warn, 78 bool isFreeBSDKPrintf) { 79 80 using namespace clang::analyze_format_string; 81 using namespace clang::analyze_printf; 82 83 const char *I = Beg; 84 const char *Start = nullptr; 85 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 86 87 // Look for a '%' character that indicates the start of a format specifier. 88 for ( ; I != E ; ++I) { 89 char c = *I; 90 if (c == '\0') { 91 // Detect spurious null characters, which are likely errors. 92 H.HandleNullChar(I); 93 return true; 94 } 95 if (c == '%') { 96 Start = I++; // Record the start of the format specifier. 97 break; 98 } 99 } 100 101 // No format specifier found? 102 if (!Start) 103 return false; 104 105 if (I == E) { 106 // No more characters left? 107 if (Warn) 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 PrintfSpecifier FS; 113 if (ParseArgPosition(H, FS, Start, I, E)) 114 return true; 115 116 if (I == E) { 117 // No more characters left? 118 if (Warn) 119 H.HandleIncompleteSpecifier(Start, E - Start); 120 return true; 121 } 122 123 if (*I == '{') { 124 ++I; 125 unsigned char PrivacyFlags = 0; 126 StringRef MatchedStr; 127 128 do { 129 StringRef Str(I, E - I); 130 std::string Match = "^[[:space:]]*" 131 "(private|public|sensitive|mask\\.[^[:space:],}]*)" 132 "[[:space:]]*(,|})"; 133 llvm::Regex R(Match); 134 SmallVector<StringRef, 2> Matches; 135 136 if (R.match(Str, &Matches)) { 137 MatchedStr = Matches[1]; 138 I += Matches[0].size(); 139 140 // Set the privacy flag if the privacy annotation in the 141 // comma-delimited segment is at least as strict as the privacy 142 // annotations in previous comma-delimited segments. 143 if (MatchedStr.starts_with("mask")) { 144 StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1); 145 unsigned Size = MaskType.size(); 146 if (Warn && (Size == 0 || Size > 8)) 147 H.handleInvalidMaskType(MaskType); 148 FS.setMaskType(MaskType); 149 } else if (MatchedStr == "sensitive") 150 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive; 151 else if (PrivacyFlags != 152 clang::analyze_os_log::OSLogBufferItem::IsSensitive && 153 MatchedStr == "private") 154 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate; 155 else if (PrivacyFlags == 0 && MatchedStr == "public") 156 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic; 157 } else { 158 size_t CommaOrBracePos = 159 Str.find_if([](char c) { return c == ',' || c == '}'; }); 160 161 if (CommaOrBracePos == StringRef::npos) { 162 // Neither a comma nor the closing brace was found. 163 if (Warn) 164 H.HandleIncompleteSpecifier(Start, E - Start); 165 return true; 166 } 167 168 I += CommaOrBracePos + 1; 169 } 170 // Continue until the closing brace is found. 171 } while (*(I - 1) == ','); 172 173 // Set the privacy flag. 174 switch (PrivacyFlags) { 175 case 0: 176 break; 177 case clang::analyze_os_log::OSLogBufferItem::IsPrivate: 178 FS.setIsPrivate(MatchedStr.data()); 179 break; 180 case clang::analyze_os_log::OSLogBufferItem::IsPublic: 181 FS.setIsPublic(MatchedStr.data()); 182 break; 183 case clang::analyze_os_log::OSLogBufferItem::IsSensitive: 184 FS.setIsSensitive(MatchedStr.data()); 185 break; 186 default: 187 llvm_unreachable("Unexpected privacy flag value"); 188 } 189 } 190 191 // Look for flags (if any). 192 bool hasMore = true; 193 for ( ; I != E; ++I) { 194 switch (*I) { 195 default: hasMore = false; break; 196 case '\'': 197 // FIXME: POSIX specific. Always accept? 198 FS.setHasThousandsGrouping(I); 199 break; 200 case '-': FS.setIsLeftJustified(I); break; 201 case '+': FS.setHasPlusPrefix(I); break; 202 case ' ': FS.setHasSpacePrefix(I); break; 203 case '#': FS.setHasAlternativeForm(I); break; 204 case '0': FS.setHasLeadingZeros(I); break; 205 } 206 if (!hasMore) 207 break; 208 } 209 210 if (I == E) { 211 // No more characters left? 212 if (Warn) 213 H.HandleIncompleteSpecifier(Start, E - Start); 214 return true; 215 } 216 217 // Look for the field width (if any). 218 if (ParseFieldWidth(H, FS, Start, I, E, 219 FS.usesPositionalArg() ? nullptr : &argIndex)) 220 return true; 221 222 if (I == E) { 223 // No more characters left? 224 if (Warn) 225 H.HandleIncompleteSpecifier(Start, E - Start); 226 return true; 227 } 228 229 // Look for the precision (if any). 230 if (*I == '.') { 231 ++I; 232 if (I == E) { 233 if (Warn) 234 H.HandleIncompleteSpecifier(Start, E - Start); 235 return true; 236 } 237 238 if (ParsePrecision(H, FS, Start, I, E, 239 FS.usesPositionalArg() ? nullptr : &argIndex)) 240 return true; 241 242 if (I == E) { 243 // No more characters left? 244 if (Warn) 245 H.HandleIncompleteSpecifier(Start, E - Start); 246 return true; 247 } 248 } 249 250 if (ParseVectorModifier(H, FS, I, E, LO)) 251 return true; 252 253 // Look for the length modifier. 254 if (ParseLengthModifier(FS, I, E, LO) && I == E) { 255 // No more characters left? 256 if (Warn) 257 H.HandleIncompleteSpecifier(Start, E - Start); 258 return true; 259 } 260 261 // Look for the Objective-C modifier flags, if any. 262 // We parse these here, even if they don't apply to 263 // the conversion specifier, and then emit an error 264 // later if the conversion specifier isn't '@'. This 265 // enables better recovery, and we don't know if 266 // these flags are applicable until later. 267 const char *ObjCModifierFlagsStart = nullptr, 268 *ObjCModifierFlagsEnd = nullptr; 269 if (*I == '[') { 270 ObjCModifierFlagsStart = I; 271 ++I; 272 auto flagStart = I; 273 for (;; ++I) { 274 ObjCModifierFlagsEnd = I; 275 if (I == E) { 276 if (Warn) 277 H.HandleIncompleteSpecifier(Start, E - Start); 278 return true; 279 } 280 // Did we find the closing ']'? 281 if (*I == ']') { 282 if (ParseObjCFlags(H, FS, flagStart, I, Warn)) 283 return true; 284 ++I; 285 break; 286 } 287 // There are no separators defined yet for multiple 288 // Objective-C modifier flags. When those are 289 // defined, this is the place to check. 290 } 291 } 292 293 if (*I == '\0') { 294 // Detect spurious null characters, which are likely errors. 295 H.HandleNullChar(I); 296 return true; 297 } 298 299 // Finally, look for the conversion specifier. 300 const char *conversionPosition = I++; 301 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 302 switch (*conversionPosition) { 303 default: 304 break; 305 // C99: 7.19.6.1 (section 8). 306 case '%': k = ConversionSpecifier::PercentArg; break; 307 case 'A': k = ConversionSpecifier::AArg; break; 308 case 'E': k = ConversionSpecifier::EArg; break; 309 case 'F': k = ConversionSpecifier::FArg; break; 310 case 'G': k = ConversionSpecifier::GArg; break; 311 case 'X': k = ConversionSpecifier::XArg; break; 312 case 'a': k = ConversionSpecifier::aArg; break; 313 case 'c': k = ConversionSpecifier::cArg; break; 314 case 'd': k = ConversionSpecifier::dArg; break; 315 case 'e': k = ConversionSpecifier::eArg; break; 316 case 'f': k = ConversionSpecifier::fArg; break; 317 case 'g': k = ConversionSpecifier::gArg; break; 318 case 'i': k = ConversionSpecifier::iArg; break; 319 case 'n': 320 // Not handled, but reserved in OpenCL and FreeBSD kernel. 321 if (!LO.OpenCL && !isFreeBSDKPrintf) 322 k = ConversionSpecifier::nArg; 323 break; 324 case 'o': k = ConversionSpecifier::oArg; break; 325 case 'p': k = ConversionSpecifier::pArg; break; 326 case 's': k = ConversionSpecifier::sArg; break; 327 case 'u': k = ConversionSpecifier::uArg; break; 328 case 'x': k = ConversionSpecifier::xArg; break; 329 // C23. 330 case 'b': 331 if (isFreeBSDKPrintf) 332 k = ConversionSpecifier::FreeBSDbArg; // int followed by char * 333 else 334 k = ConversionSpecifier::bArg; 335 break; 336 case 'B': k = ConversionSpecifier::BArg; break; 337 // POSIX specific. 338 case 'C': k = ConversionSpecifier::CArg; break; 339 case 'S': k = ConversionSpecifier::SArg; break; 340 // Apple extension for os_log 341 case 'P': 342 k = ConversionSpecifier::PArg; 343 break; 344 // Objective-C. 345 case '@': k = ConversionSpecifier::ObjCObjArg; break; 346 // Glibc specific. 347 case 'm': k = ConversionSpecifier::PrintErrno; break; 348 case 'r': 349 if (isFreeBSDKPrintf) 350 k = ConversionSpecifier::FreeBSDrArg; // int 351 else if (LO.FixedPoint) 352 k = ConversionSpecifier::rArg; 353 break; 354 case 'y': 355 if (isFreeBSDKPrintf) 356 k = ConversionSpecifier::FreeBSDyArg; // int 357 break; 358 // Apple-specific. 359 case 'D': 360 if (isFreeBSDKPrintf) 361 k = ConversionSpecifier::FreeBSDDArg; // void * followed by char * 362 else if (Target.getTriple().isOSDarwin()) 363 k = ConversionSpecifier::DArg; 364 break; 365 case 'O': 366 if (Target.getTriple().isOSDarwin()) 367 k = ConversionSpecifier::OArg; 368 break; 369 case 'U': 370 if (Target.getTriple().isOSDarwin()) 371 k = ConversionSpecifier::UArg; 372 break; 373 // MS specific. 374 case 'Z': 375 if (Target.getTriple().isOSMSVCRT()) 376 k = ConversionSpecifier::ZArg; 377 break; 378 // ISO/IEC TR 18037 (fixed-point) specific. 379 // NOTE: 'r' is handled up above since FreeBSD also supports %r. 380 case 'k': 381 if (LO.FixedPoint) 382 k = ConversionSpecifier::kArg; 383 break; 384 case 'K': 385 if (LO.FixedPoint) 386 k = ConversionSpecifier::KArg; 387 break; 388 case 'R': 389 if (LO.FixedPoint) 390 k = ConversionSpecifier::RArg; 391 break; 392 } 393 394 // Check to see if we used the Objective-C modifier flags with 395 // a conversion specifier other than '@'. 396 if (k != ConversionSpecifier::ObjCObjArg && 397 k != ConversionSpecifier::InvalidSpecifier && 398 ObjCModifierFlagsStart) { 399 H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart, 400 ObjCModifierFlagsEnd + 1, 401 conversionPosition); 402 return true; 403 } 404 405 PrintfConversionSpecifier CS(conversionPosition, k); 406 FS.setConversionSpecifier(CS); 407 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 408 FS.setArgIndex(argIndex++); 409 // FreeBSD kernel specific. 410 if (k == ConversionSpecifier::FreeBSDbArg || 411 k == ConversionSpecifier::FreeBSDDArg) 412 argIndex++; 413 414 if (k == ConversionSpecifier::InvalidSpecifier) { 415 unsigned Len = I - Start; 416 if (ParseUTF8InvalidSpecifier(Start, E, Len)) { 417 CS.setEndScanList(Start + Len); 418 FS.setConversionSpecifier(CS); 419 } 420 // Assume the conversion takes one argument. 421 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len); 422 } 423 return PrintfSpecifierResult(Start, FS); 424 } 425 426 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 427 const char *I, 428 const char *E, 429 const LangOptions &LO, 430 const TargetInfo &Target, 431 bool isFreeBSDKPrintf) { 432 433 unsigned argIndex = 0; 434 435 // Keep looking for a format specifier until we have exhausted the string. 436 while (I != E) { 437 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 438 LO, Target, true, 439 isFreeBSDKPrintf); 440 // Did a fail-stop error of any kind occur when parsing the specifier? 441 // If so, don't do any more processing. 442 if (FSR.shouldStop()) 443 return true; 444 // Did we exhaust the string or encounter an error that 445 // we can recover from? 446 if (!FSR.hasValue()) 447 continue; 448 // We have a format specifier. Pass it to the callback. 449 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 450 I - FSR.getStart(), Target)) 451 return true; 452 } 453 assert(I == E && "Format string not exhausted"); 454 return false; 455 } 456 457 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, 458 const char *E, 459 const LangOptions &LO, 460 const TargetInfo &Target) { 461 462 unsigned argIndex = 0; 463 464 // Keep looking for a %s format specifier until we have exhausted the string. 465 FormatStringHandler H; 466 while (I != E) { 467 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 468 LO, Target, false, 469 false); 470 // Did a fail-stop error of any kind occur when parsing the specifier? 471 // If so, don't do any more processing. 472 if (FSR.shouldStop()) 473 return false; 474 // Did we exhaust the string or encounter an error that 475 // we can recover from? 476 if (!FSR.hasValue()) 477 continue; 478 const analyze_printf::PrintfSpecifier &FS = FSR.getValue(); 479 // Return true if this a %s format specifier. 480 if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg) 481 return true; 482 } 483 return false; 484 } 485 486 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( 487 const char *Begin, const char *End, const LangOptions &LO, 488 const TargetInfo &Target) { 489 unsigned ArgIndex = 0; 490 // Keep looking for a formatting specifier until we have exhausted the string. 491 FormatStringHandler H; 492 while (Begin != End) { 493 const PrintfSpecifierResult &FSR = 494 ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false); 495 if (FSR.shouldStop()) 496 break; 497 if (FSR.hasValue()) 498 return true; 499 } 500 return false; 501 } 502 503 //===----------------------------------------------------------------------===// 504 // Methods on PrintfSpecifier. 505 //===----------------------------------------------------------------------===// 506 507 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx, 508 bool IsObjCLiteral) const { 509 if (CS.getKind() == ConversionSpecifier::cArg) 510 switch (LM.getKind()) { 511 case LengthModifier::None: 512 return Ctx.IntTy; 513 case LengthModifier::AsLong: 514 case LengthModifier::AsWide: 515 return ArgType(ArgType::WIntTy, "wint_t"); 516 case LengthModifier::AsShort: 517 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 518 return Ctx.IntTy; 519 [[fallthrough]]; 520 default: 521 return ArgType::Invalid(); 522 } 523 524 if (CS.isIntArg()) 525 switch (LM.getKind()) { 526 case LengthModifier::AsLongDouble: 527 // GNU extension. 528 return Ctx.LongLongTy; 529 case LengthModifier::None: 530 case LengthModifier::AsShortLong: 531 return Ctx.IntTy; 532 case LengthModifier::AsInt32: 533 return ArgType(Ctx.IntTy, "__int32"); 534 case LengthModifier::AsChar: 535 return ArgType::AnyCharTy; 536 case LengthModifier::AsShort: return Ctx.ShortTy; 537 case LengthModifier::AsLong: return Ctx.LongTy; 538 case LengthModifier::AsLongLong: 539 case LengthModifier::AsQuad: 540 return Ctx.LongLongTy; 541 case LengthModifier::AsInt64: 542 return ArgType(Ctx.LongLongTy, "__int64"); 543 case LengthModifier::AsIntMax: 544 return ArgType(Ctx.getIntMaxType(), "intmax_t"); 545 case LengthModifier::AsSizeT: 546 return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 547 case LengthModifier::AsInt3264: 548 return Ctx.getTargetInfo().getTriple().isArch64Bit() 549 ? ArgType(Ctx.LongLongTy, "__int64") 550 : ArgType(Ctx.IntTy, "__int32"); 551 case LengthModifier::AsPtrDiff: 552 return ArgType::makePtrdiffT( 553 ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 554 case LengthModifier::AsAllocate: 555 case LengthModifier::AsMAllocate: 556 case LengthModifier::AsWide: 557 return ArgType::Invalid(); 558 } 559 560 if (CS.isUIntArg()) 561 switch (LM.getKind()) { 562 case LengthModifier::AsLongDouble: 563 // GNU extension. 564 return Ctx.UnsignedLongLongTy; 565 case LengthModifier::None: 566 case LengthModifier::AsShortLong: 567 return Ctx.UnsignedIntTy; 568 case LengthModifier::AsInt32: 569 return ArgType(Ctx.UnsignedIntTy, "unsigned __int32"); 570 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 571 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 572 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 573 case LengthModifier::AsLongLong: 574 case LengthModifier::AsQuad: 575 return Ctx.UnsignedLongLongTy; 576 case LengthModifier::AsInt64: 577 return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"); 578 case LengthModifier::AsIntMax: 579 return ArgType(Ctx.getUIntMaxType(), "uintmax_t"); 580 case LengthModifier::AsSizeT: 581 return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t")); 582 case LengthModifier::AsInt3264: 583 return Ctx.getTargetInfo().getTriple().isArch64Bit() 584 ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64") 585 : ArgType(Ctx.UnsignedIntTy, "unsigned __int32"); 586 case LengthModifier::AsPtrDiff: 587 return ArgType::makePtrdiffT( 588 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 589 case LengthModifier::AsAllocate: 590 case LengthModifier::AsMAllocate: 591 case LengthModifier::AsWide: 592 return ArgType::Invalid(); 593 } 594 595 if (CS.isDoubleArg()) { 596 if (!VectorNumElts.isInvalid()) { 597 switch (LM.getKind()) { 598 case LengthModifier::AsShort: 599 return Ctx.HalfTy; 600 case LengthModifier::AsShortLong: 601 return Ctx.FloatTy; 602 case LengthModifier::AsLong: 603 default: 604 return Ctx.DoubleTy; 605 } 606 } 607 608 if (LM.getKind() == LengthModifier::AsLongDouble) 609 return Ctx.LongDoubleTy; 610 return Ctx.DoubleTy; 611 } 612 613 if (CS.getKind() == ConversionSpecifier::nArg) { 614 switch (LM.getKind()) { 615 case LengthModifier::None: 616 return ArgType::PtrTo(Ctx.IntTy); 617 case LengthModifier::AsChar: 618 return ArgType::PtrTo(Ctx.SignedCharTy); 619 case LengthModifier::AsShort: 620 return ArgType::PtrTo(Ctx.ShortTy); 621 case LengthModifier::AsLong: 622 return ArgType::PtrTo(Ctx.LongTy); 623 case LengthModifier::AsLongLong: 624 case LengthModifier::AsQuad: 625 return ArgType::PtrTo(Ctx.LongLongTy); 626 case LengthModifier::AsIntMax: 627 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 628 case LengthModifier::AsSizeT: 629 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 630 case LengthModifier::AsPtrDiff: 631 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 632 case LengthModifier::AsLongDouble: 633 return ArgType(); // FIXME: Is this a known extension? 634 case LengthModifier::AsAllocate: 635 case LengthModifier::AsMAllocate: 636 case LengthModifier::AsInt32: 637 case LengthModifier::AsInt3264: 638 case LengthModifier::AsInt64: 639 case LengthModifier::AsWide: 640 return ArgType::Invalid(); 641 case LengthModifier::AsShortLong: 642 llvm_unreachable("only used for OpenCL which doesn not handle nArg"); 643 } 644 } 645 646 if (CS.isFixedPointArg() && !Ctx.getLangOpts().FixedPoint) 647 return ArgType::Invalid(); 648 649 switch (CS.getKind()) { 650 case ConversionSpecifier::sArg: 651 if (LM.getKind() == LengthModifier::AsWideChar) { 652 if (IsObjCLiteral) 653 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()), 654 "const unichar *"); 655 return ArgType(ArgType::WCStrTy, "wchar_t *"); 656 } 657 if (LM.getKind() == LengthModifier::AsWide) 658 return ArgType(ArgType::WCStrTy, "wchar_t *"); 659 return ArgType::CStrTy; 660 case ConversionSpecifier::SArg: 661 if (IsObjCLiteral) 662 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()), 663 "const unichar *"); 664 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && 665 LM.getKind() == LengthModifier::AsShort) 666 return ArgType::CStrTy; 667 return ArgType(ArgType::WCStrTy, "wchar_t *"); 668 case ConversionSpecifier::CArg: 669 if (IsObjCLiteral) 670 return ArgType(Ctx.UnsignedShortTy, "unichar"); 671 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && 672 LM.getKind() == LengthModifier::AsShort) 673 return Ctx.IntTy; 674 return ArgType(Ctx.WideCharTy, "wchar_t"); 675 case ConversionSpecifier::pArg: 676 case ConversionSpecifier::PArg: 677 return ArgType::CPointerTy; 678 case ConversionSpecifier::ObjCObjArg: 679 return ArgType::ObjCPointerTy; 680 case ConversionSpecifier::kArg: 681 switch (LM.getKind()) { 682 case LengthModifier::None: 683 return Ctx.AccumTy; 684 case LengthModifier::AsShort: 685 return Ctx.ShortAccumTy; 686 case LengthModifier::AsLong: 687 return Ctx.LongAccumTy; 688 default: 689 return ArgType::Invalid(); 690 } 691 case ConversionSpecifier::KArg: 692 switch (LM.getKind()) { 693 case LengthModifier::None: 694 return Ctx.UnsignedAccumTy; 695 case LengthModifier::AsShort: 696 return Ctx.UnsignedShortAccumTy; 697 case LengthModifier::AsLong: 698 return Ctx.UnsignedLongAccumTy; 699 default: 700 return ArgType::Invalid(); 701 } 702 case ConversionSpecifier::rArg: 703 switch (LM.getKind()) { 704 case LengthModifier::None: 705 return Ctx.FractTy; 706 case LengthModifier::AsShort: 707 return Ctx.ShortFractTy; 708 case LengthModifier::AsLong: 709 return Ctx.LongFractTy; 710 default: 711 return ArgType::Invalid(); 712 } 713 case ConversionSpecifier::RArg: 714 switch (LM.getKind()) { 715 case LengthModifier::None: 716 return Ctx.UnsignedFractTy; 717 case LengthModifier::AsShort: 718 return Ctx.UnsignedShortFractTy; 719 case LengthModifier::AsLong: 720 return Ctx.UnsignedLongFractTy; 721 default: 722 return ArgType::Invalid(); 723 } 724 default: 725 break; 726 } 727 728 // FIXME: Handle other cases. 729 return ArgType(); 730 } 731 732 733 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, 734 bool IsObjCLiteral) const { 735 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 736 737 if (!CS.consumesDataArgument()) 738 return ArgType::Invalid(); 739 740 ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral); 741 if (!ScalarTy.isValid() || VectorNumElts.isInvalid()) 742 return ScalarTy; 743 744 return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount()); 745 } 746 747 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 748 ASTContext &Ctx, bool IsObjCLiteral) { 749 // %n is different from other conversion specifiers; don't try to fix it. 750 if (CS.getKind() == ConversionSpecifier::nArg) 751 return false; 752 753 // Handle Objective-C objects first. Note that while the '%@' specifier will 754 // not warn for structure pointer or void pointer arguments (because that's 755 // how CoreFoundation objects are implemented), we only show a fixit for '%@' 756 // if we know it's an object (block, id, class, or __attribute__((NSObject))). 757 if (QT->isObjCRetainableType()) { 758 if (!IsObjCLiteral) 759 return false; 760 761 CS.setKind(ConversionSpecifier::ObjCObjArg); 762 763 // Disable irrelevant flags 764 HasThousandsGrouping = false; 765 HasPlusPrefix = false; 766 HasSpacePrefix = false; 767 HasAlternativeForm = false; 768 HasLeadingZeroes = false; 769 Precision.setHowSpecified(OptionalAmount::NotSpecified); 770 LM.setKind(LengthModifier::None); 771 772 return true; 773 } 774 775 // Handle strings next (char *, wchar_t *) 776 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 777 CS.setKind(ConversionSpecifier::sArg); 778 779 // Disable irrelevant flags 780 HasAlternativeForm = false; 781 HasLeadingZeroes = false; 782 783 // Set the long length modifier for wide characters 784 if (QT->getPointeeType()->isWideCharType()) 785 LM.setKind(LengthModifier::AsWideChar); 786 else 787 LM.setKind(LengthModifier::None); 788 789 return true; 790 } 791 792 // If it's an enum, get its underlying type. 793 if (const EnumType *ETy = QT->getAs<EnumType>()) 794 QT = ETy->getDecl()->getIntegerType(); 795 796 const BuiltinType *BT = QT->getAs<BuiltinType>(); 797 if (!BT) { 798 const VectorType *VT = QT->getAs<VectorType>(); 799 if (VT) { 800 QT = VT->getElementType(); 801 BT = QT->getAs<BuiltinType>(); 802 VectorNumElts = OptionalAmount(VT->getNumElements()); 803 } 804 } 805 806 // We can only work with builtin types. 807 if (!BT) 808 return false; 809 810 // Set length modifier 811 switch (BT->getKind()) { 812 case BuiltinType::Bool: 813 case BuiltinType::WChar_U: 814 case BuiltinType::WChar_S: 815 case BuiltinType::Char8: // FIXME: Treat like 'char'? 816 case BuiltinType::Char16: 817 case BuiltinType::Char32: 818 case BuiltinType::UInt128: 819 case BuiltinType::Int128: 820 case BuiltinType::Half: 821 case BuiltinType::BFloat16: 822 case BuiltinType::Float16: 823 case BuiltinType::Float128: 824 case BuiltinType::Ibm128: 825 case BuiltinType::ShortAccum: 826 case BuiltinType::Accum: 827 case BuiltinType::LongAccum: 828 case BuiltinType::UShortAccum: 829 case BuiltinType::UAccum: 830 case BuiltinType::ULongAccum: 831 case BuiltinType::ShortFract: 832 case BuiltinType::Fract: 833 case BuiltinType::LongFract: 834 case BuiltinType::UShortFract: 835 case BuiltinType::UFract: 836 case BuiltinType::ULongFract: 837 case BuiltinType::SatShortAccum: 838 case BuiltinType::SatAccum: 839 case BuiltinType::SatLongAccum: 840 case BuiltinType::SatUShortAccum: 841 case BuiltinType::SatUAccum: 842 case BuiltinType::SatULongAccum: 843 case BuiltinType::SatShortFract: 844 case BuiltinType::SatFract: 845 case BuiltinType::SatLongFract: 846 case BuiltinType::SatUShortFract: 847 case BuiltinType::SatUFract: 848 case BuiltinType::SatULongFract: 849 // Various types which are non-trivial to correct. 850 return false; 851 852 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ 853 case BuiltinType::Id: 854 #include "clang/Basic/OpenCLImageTypes.def" 855 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ 856 case BuiltinType::Id: 857 #include "clang/Basic/OpenCLExtensionTypes.def" 858 #define SVE_TYPE(Name, Id, SingletonId) \ 859 case BuiltinType::Id: 860 #include "clang/Basic/AArch64SVEACLETypes.def" 861 #define PPC_VECTOR_TYPE(Name, Id, Size) \ 862 case BuiltinType::Id: 863 #include "clang/Basic/PPCTypes.def" 864 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: 865 #include "clang/Basic/RISCVVTypes.def" 866 #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: 867 #include "clang/Basic/WebAssemblyReferenceTypes.def" 868 #define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: 869 #include "clang/Basic/AMDGPUTypes.def" 870 #define SIGNED_TYPE(Id, SingletonId) 871 #define UNSIGNED_TYPE(Id, SingletonId) 872 #define FLOATING_TYPE(Id, SingletonId) 873 #define BUILTIN_TYPE(Id, SingletonId) \ 874 case BuiltinType::Id: 875 #include "clang/AST/BuiltinTypes.def" 876 // Misc other stuff which doesn't make sense here. 877 return false; 878 879 case BuiltinType::UInt: 880 case BuiltinType::Int: 881 case BuiltinType::Float: 882 LM.setKind(VectorNumElts.isInvalid() ? 883 LengthModifier::None : LengthModifier::AsShortLong); 884 break; 885 case BuiltinType::Double: 886 LM.setKind(VectorNumElts.isInvalid() ? 887 LengthModifier::None : LengthModifier::AsLong); 888 break; 889 case BuiltinType::Char_U: 890 case BuiltinType::UChar: 891 case BuiltinType::Char_S: 892 case BuiltinType::SChar: 893 LM.setKind(LengthModifier::AsChar); 894 break; 895 896 case BuiltinType::Short: 897 case BuiltinType::UShort: 898 LM.setKind(LengthModifier::AsShort); 899 break; 900 901 case BuiltinType::Long: 902 case BuiltinType::ULong: 903 LM.setKind(LengthModifier::AsLong); 904 break; 905 906 case BuiltinType::LongLong: 907 case BuiltinType::ULongLong: 908 LM.setKind(LengthModifier::AsLongLong); 909 break; 910 911 case BuiltinType::LongDouble: 912 LM.setKind(LengthModifier::AsLongDouble); 913 break; 914 } 915 916 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 917 if (LangOpt.C99 || LangOpt.CPlusPlus11) 918 namedTypeToLengthModifier(QT, LM); 919 920 // If fixing the length modifier was enough, we might be done. 921 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { 922 // If we're going to offer a fix anyway, make sure the sign matches. 923 switch (CS.getKind()) { 924 case ConversionSpecifier::uArg: 925 case ConversionSpecifier::UArg: 926 if (QT->isSignedIntegerType()) 927 CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg); 928 break; 929 case ConversionSpecifier::dArg: 930 case ConversionSpecifier::DArg: 931 case ConversionSpecifier::iArg: 932 if (QT->isUnsignedIntegerType() && !HasPlusPrefix) 933 CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg); 934 break; 935 default: 936 // Other specifiers do not have signed/unsigned variants. 937 break; 938 } 939 940 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral); 941 if (ATR.isValid() && ATR.matchesType(Ctx, QT)) 942 return true; 943 } 944 945 // Set conversion specifier and disable any flags which do not apply to it. 946 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 947 if (!QT->getAs<TypedefType>() && QT->isCharType()) { 948 CS.setKind(ConversionSpecifier::cArg); 949 LM.setKind(LengthModifier::None); 950 Precision.setHowSpecified(OptionalAmount::NotSpecified); 951 HasAlternativeForm = false; 952 HasLeadingZeroes = false; 953 HasPlusPrefix = false; 954 } 955 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 956 else if (QT->isRealFloatingType()) { 957 CS.setKind(ConversionSpecifier::fArg); 958 } else if (QT->isSignedIntegerType()) { 959 CS.setKind(ConversionSpecifier::dArg); 960 HasAlternativeForm = false; 961 } else if (QT->isUnsignedIntegerType()) { 962 CS.setKind(ConversionSpecifier::uArg); 963 HasAlternativeForm = false; 964 HasPlusPrefix = false; 965 } else { 966 llvm_unreachable("Unexpected type"); 967 } 968 969 return true; 970 } 971 972 void PrintfSpecifier::toString(raw_ostream &os) const { 973 // Whilst some features have no defined order, we are using the order 974 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 975 os << "%"; 976 977 // Positional args 978 if (usesPositionalArg()) { 979 os << getPositionalArgIndex() << "$"; 980 } 981 982 // Conversion flags 983 if (IsLeftJustified) os << "-"; 984 if (HasPlusPrefix) os << "+"; 985 if (HasSpacePrefix) os << " "; 986 if (HasAlternativeForm) os << "#"; 987 if (HasLeadingZeroes) os << "0"; 988 989 // Minimum field width 990 FieldWidth.toString(os); 991 // Precision 992 Precision.toString(os); 993 994 // Vector modifier 995 if (!VectorNumElts.isInvalid()) 996 os << 'v' << VectorNumElts.getConstantAmount(); 997 998 // Length modifier 999 os << LM.toString(); 1000 // Conversion specifier 1001 os << CS.toString(); 1002 } 1003 1004 bool PrintfSpecifier::hasValidPlusPrefix() const { 1005 if (!HasPlusPrefix) 1006 return true; 1007 1008 // The plus prefix only makes sense for signed conversions 1009 switch (CS.getKind()) { 1010 case ConversionSpecifier::dArg: 1011 case ConversionSpecifier::DArg: 1012 case ConversionSpecifier::iArg: 1013 case ConversionSpecifier::fArg: 1014 case ConversionSpecifier::FArg: 1015 case ConversionSpecifier::eArg: 1016 case ConversionSpecifier::EArg: 1017 case ConversionSpecifier::gArg: 1018 case ConversionSpecifier::GArg: 1019 case ConversionSpecifier::aArg: 1020 case ConversionSpecifier::AArg: 1021 case ConversionSpecifier::FreeBSDrArg: 1022 case ConversionSpecifier::FreeBSDyArg: 1023 case ConversionSpecifier::rArg: 1024 case ConversionSpecifier::kArg: 1025 return true; 1026 1027 default: 1028 return false; 1029 } 1030 } 1031 1032 bool PrintfSpecifier::hasValidAlternativeForm() const { 1033 if (!HasAlternativeForm) 1034 return true; 1035 1036 // Alternate form flag only valid with the bBoxXaAeEfFgGrRkK conversions 1037 switch (CS.getKind()) { 1038 case ConversionSpecifier::bArg: 1039 case ConversionSpecifier::BArg: 1040 case ConversionSpecifier::oArg: 1041 case ConversionSpecifier::OArg: 1042 case ConversionSpecifier::xArg: 1043 case ConversionSpecifier::XArg: 1044 case ConversionSpecifier::aArg: 1045 case ConversionSpecifier::AArg: 1046 case ConversionSpecifier::eArg: 1047 case ConversionSpecifier::EArg: 1048 case ConversionSpecifier::fArg: 1049 case ConversionSpecifier::FArg: 1050 case ConversionSpecifier::gArg: 1051 case ConversionSpecifier::GArg: 1052 case ConversionSpecifier::FreeBSDrArg: 1053 case ConversionSpecifier::FreeBSDyArg: 1054 case ConversionSpecifier::rArg: 1055 case ConversionSpecifier::RArg: 1056 case ConversionSpecifier::kArg: 1057 case ConversionSpecifier::KArg: 1058 return true; 1059 1060 default: 1061 return false; 1062 } 1063 } 1064 1065 bool PrintfSpecifier::hasValidLeadingZeros() const { 1066 if (!HasLeadingZeroes) 1067 return true; 1068 1069 // Leading zeroes flag only valid with the bBdiouxXaAeEfFgGrRkK conversions 1070 switch (CS.getKind()) { 1071 case ConversionSpecifier::bArg: 1072 case ConversionSpecifier::BArg: 1073 case ConversionSpecifier::dArg: 1074 case ConversionSpecifier::DArg: 1075 case ConversionSpecifier::iArg: 1076 case ConversionSpecifier::oArg: 1077 case ConversionSpecifier::OArg: 1078 case ConversionSpecifier::uArg: 1079 case ConversionSpecifier::UArg: 1080 case ConversionSpecifier::xArg: 1081 case ConversionSpecifier::XArg: 1082 case ConversionSpecifier::aArg: 1083 case ConversionSpecifier::AArg: 1084 case ConversionSpecifier::eArg: 1085 case ConversionSpecifier::EArg: 1086 case ConversionSpecifier::fArg: 1087 case ConversionSpecifier::FArg: 1088 case ConversionSpecifier::gArg: 1089 case ConversionSpecifier::GArg: 1090 case ConversionSpecifier::FreeBSDrArg: 1091 case ConversionSpecifier::FreeBSDyArg: 1092 case ConversionSpecifier::rArg: 1093 case ConversionSpecifier::RArg: 1094 case ConversionSpecifier::kArg: 1095 case ConversionSpecifier::KArg: 1096 return true; 1097 1098 default: 1099 return false; 1100 } 1101 } 1102 1103 bool PrintfSpecifier::hasValidSpacePrefix() const { 1104 if (!HasSpacePrefix) 1105 return true; 1106 1107 // The space prefix only makes sense for signed conversions 1108 switch (CS.getKind()) { 1109 case ConversionSpecifier::dArg: 1110 case ConversionSpecifier::DArg: 1111 case ConversionSpecifier::iArg: 1112 case ConversionSpecifier::fArg: 1113 case ConversionSpecifier::FArg: 1114 case ConversionSpecifier::eArg: 1115 case ConversionSpecifier::EArg: 1116 case ConversionSpecifier::gArg: 1117 case ConversionSpecifier::GArg: 1118 case ConversionSpecifier::aArg: 1119 case ConversionSpecifier::AArg: 1120 case ConversionSpecifier::FreeBSDrArg: 1121 case ConversionSpecifier::FreeBSDyArg: 1122 case ConversionSpecifier::rArg: 1123 case ConversionSpecifier::kArg: 1124 return true; 1125 1126 default: 1127 return false; 1128 } 1129 } 1130 1131 bool PrintfSpecifier::hasValidLeftJustified() const { 1132 if (!IsLeftJustified) 1133 return true; 1134 1135 // The left justified flag is valid for all conversions except n 1136 switch (CS.getKind()) { 1137 case ConversionSpecifier::nArg: 1138 return false; 1139 1140 default: 1141 return true; 1142 } 1143 } 1144 1145 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 1146 if (!HasThousandsGrouping) 1147 return true; 1148 1149 switch (CS.getKind()) { 1150 case ConversionSpecifier::dArg: 1151 case ConversionSpecifier::DArg: 1152 case ConversionSpecifier::iArg: 1153 case ConversionSpecifier::uArg: 1154 case ConversionSpecifier::UArg: 1155 case ConversionSpecifier::fArg: 1156 case ConversionSpecifier::FArg: 1157 case ConversionSpecifier::gArg: 1158 case ConversionSpecifier::GArg: 1159 return true; 1160 default: 1161 return false; 1162 } 1163 } 1164 1165 bool PrintfSpecifier::hasValidPrecision() const { 1166 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 1167 return true; 1168 1169 // Precision is only valid with the bBdiouxXaAeEfFgGsPrRkK conversions 1170 switch (CS.getKind()) { 1171 case ConversionSpecifier::bArg: 1172 case ConversionSpecifier::BArg: 1173 case ConversionSpecifier::dArg: 1174 case ConversionSpecifier::DArg: 1175 case ConversionSpecifier::iArg: 1176 case ConversionSpecifier::oArg: 1177 case ConversionSpecifier::OArg: 1178 case ConversionSpecifier::uArg: 1179 case ConversionSpecifier::UArg: 1180 case ConversionSpecifier::xArg: 1181 case ConversionSpecifier::XArg: 1182 case ConversionSpecifier::aArg: 1183 case ConversionSpecifier::AArg: 1184 case ConversionSpecifier::eArg: 1185 case ConversionSpecifier::EArg: 1186 case ConversionSpecifier::fArg: 1187 case ConversionSpecifier::FArg: 1188 case ConversionSpecifier::gArg: 1189 case ConversionSpecifier::GArg: 1190 case ConversionSpecifier::sArg: 1191 case ConversionSpecifier::FreeBSDrArg: 1192 case ConversionSpecifier::FreeBSDyArg: 1193 case ConversionSpecifier::PArg: 1194 case ConversionSpecifier::rArg: 1195 case ConversionSpecifier::RArg: 1196 case ConversionSpecifier::kArg: 1197 case ConversionSpecifier::KArg: 1198 return true; 1199 1200 default: 1201 return false; 1202 } 1203 } 1204 bool PrintfSpecifier::hasValidFieldWidth() const { 1205 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 1206 return true; 1207 1208 // The field width is valid for all conversions except n 1209 switch (CS.getKind()) { 1210 case ConversionSpecifier::nArg: 1211 return false; 1212 1213 default: 1214 return true; 1215 } 1216 } 1217