1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handling of format string in printf and friends. The structure of format 10 // strings for fprintf() are described in C99 7.19.6.1. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "FormatStringParsing.h" 15 #include "clang/AST/FormatString.h" 16 #include "clang/AST/OSLog.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "llvm/Support/Regex.h" 19 20 using clang::analyze_format_string::ArgType; 21 using clang::analyze_format_string::FormatStringHandler; 22 using clang::analyze_format_string::LengthModifier; 23 using clang::analyze_format_string::OptionalAmount; 24 using clang::analyze_format_string::ConversionSpecifier; 25 using clang::analyze_printf::PrintfSpecifier; 26 27 using namespace clang; 28 29 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 30 PrintfSpecifierResult; 31 32 //===----------------------------------------------------------------------===// 33 // Methods for parsing format strings. 34 //===----------------------------------------------------------------------===// 35 36 using analyze_format_string::ParseNonPositionAmount; 37 38 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 39 const char *Start, const char *&Beg, const char *E, 40 unsigned *argIndex) { 41 if (argIndex) { 42 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 43 } else { 44 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 45 analyze_format_string::PrecisionPos); 46 if (Amt.isInvalid()) 47 return true; 48 FS.setPrecision(Amt); 49 } 50 return false; 51 } 52 53 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, 54 const char *FlagBeg, const char *E, bool Warn) { 55 StringRef Flag(FlagBeg, E - FlagBeg); 56 // Currently there is only one flag. 57 if (Flag == "tt") { 58 FS.setHasObjCTechnicalTerm(FlagBeg); 59 return false; 60 } 61 // Handle either the case of no flag or an invalid flag. 62 if (Warn) { 63 if (Flag == "") 64 H.HandleEmptyObjCModifierFlag(FlagBeg, E - FlagBeg); 65 else 66 H.HandleInvalidObjCModifierFlag(FlagBeg, E - FlagBeg); 67 } 68 return true; 69 } 70 71 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 72 const char *&Beg, 73 const char *E, 74 unsigned &argIndex, 75 const LangOptions &LO, 76 const TargetInfo &Target, 77 bool Warn, 78 bool isFreeBSDKPrintf) { 79 80 using namespace clang::analyze_format_string; 81 using namespace clang::analyze_printf; 82 83 const char *I = Beg; 84 const char *Start = nullptr; 85 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 86 87 // Look for a '%' character that indicates the start of a format specifier. 88 for ( ; I != E ; ++I) { 89 char c = *I; 90 if (c == '\0') { 91 // Detect spurious null characters, which are likely errors. 92 H.HandleNullChar(I); 93 return true; 94 } 95 if (c == '%') { 96 Start = I++; // Record the start of the format specifier. 97 break; 98 } 99 } 100 101 // No format specifier found? 102 if (!Start) 103 return false; 104 105 if (I == E) { 106 // No more characters left? 107 if (Warn) 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 PrintfSpecifier FS; 113 if (ParseArgPosition(H, FS, Start, I, E)) 114 return true; 115 116 if (I == E) { 117 // No more characters left? 118 if (Warn) 119 H.HandleIncompleteSpecifier(Start, E - Start); 120 return true; 121 } 122 123 if (*I == '{') { 124 ++I; 125 unsigned char PrivacyFlags = 0; 126 StringRef MatchedStr; 127 128 do { 129 StringRef Str(I, E - I); 130 std::string Match = "^[[:space:]]*" 131 "(private|public|sensitive|mask\\.[^[:space:],}]*)" 132 "[[:space:]]*(,|})"; 133 llvm::Regex R(Match); 134 SmallVector<StringRef, 2> Matches; 135 136 if (R.match(Str, &Matches)) { 137 MatchedStr = Matches[1]; 138 I += Matches[0].size(); 139 140 // Set the privacy flag if the privacy annotation in the 141 // comma-delimited segment is at least as strict as the privacy 142 // annotations in previous comma-delimited segments. 143 if (MatchedStr.starts_with("mask")) { 144 StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1); 145 unsigned Size = MaskType.size(); 146 if (Warn && (Size == 0 || Size > 8)) 147 H.handleInvalidMaskType(MaskType); 148 FS.setMaskType(MaskType); 149 } else if (MatchedStr.equals("sensitive")) 150 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive; 151 else if (PrivacyFlags != 152 clang::analyze_os_log::OSLogBufferItem::IsSensitive && 153 MatchedStr.equals("private")) 154 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate; 155 else if (PrivacyFlags == 0 && MatchedStr.equals("public")) 156 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic; 157 } else { 158 size_t CommaOrBracePos = 159 Str.find_if([](char c) { return c == ',' || c == '}'; }); 160 161 if (CommaOrBracePos == StringRef::npos) { 162 // Neither a comma nor the closing brace was found. 163 if (Warn) 164 H.HandleIncompleteSpecifier(Start, E - Start); 165 return true; 166 } 167 168 I += CommaOrBracePos + 1; 169 } 170 // Continue until the closing brace is found. 171 } while (*(I - 1) == ','); 172 173 // Set the privacy flag. 174 switch (PrivacyFlags) { 175 case 0: 176 break; 177 case clang::analyze_os_log::OSLogBufferItem::IsPrivate: 178 FS.setIsPrivate(MatchedStr.data()); 179 break; 180 case clang::analyze_os_log::OSLogBufferItem::IsPublic: 181 FS.setIsPublic(MatchedStr.data()); 182 break; 183 case clang::analyze_os_log::OSLogBufferItem::IsSensitive: 184 FS.setIsSensitive(MatchedStr.data()); 185 break; 186 default: 187 llvm_unreachable("Unexpected privacy flag value"); 188 } 189 } 190 191 // Look for flags (if any). 192 bool hasMore = true; 193 for ( ; I != E; ++I) { 194 switch (*I) { 195 default: hasMore = false; break; 196 case '\'': 197 // FIXME: POSIX specific. Always accept? 198 FS.setHasThousandsGrouping(I); 199 break; 200 case '-': FS.setIsLeftJustified(I); break; 201 case '+': FS.setHasPlusPrefix(I); break; 202 case ' ': FS.setHasSpacePrefix(I); break; 203 case '#': FS.setHasAlternativeForm(I); break; 204 case '0': FS.setHasLeadingZeros(I); break; 205 } 206 if (!hasMore) 207 break; 208 } 209 210 if (I == E) { 211 // No more characters left? 212 if (Warn) 213 H.HandleIncompleteSpecifier(Start, E - Start); 214 return true; 215 } 216 217 // Look for the field width (if any). 218 if (ParseFieldWidth(H, FS, Start, I, E, 219 FS.usesPositionalArg() ? nullptr : &argIndex)) 220 return true; 221 222 if (I == E) { 223 // No more characters left? 224 if (Warn) 225 H.HandleIncompleteSpecifier(Start, E - Start); 226 return true; 227 } 228 229 // Look for the precision (if any). 230 if (*I == '.') { 231 ++I; 232 if (I == E) { 233 if (Warn) 234 H.HandleIncompleteSpecifier(Start, E - Start); 235 return true; 236 } 237 238 if (ParsePrecision(H, FS, Start, I, E, 239 FS.usesPositionalArg() ? nullptr : &argIndex)) 240 return true; 241 242 if (I == E) { 243 // No more characters left? 244 if (Warn) 245 H.HandleIncompleteSpecifier(Start, E - Start); 246 return true; 247 } 248 } 249 250 if (ParseVectorModifier(H, FS, I, E, LO)) 251 return true; 252 253 // Look for the length modifier. 254 if (ParseLengthModifier(FS, I, E, LO) && I == E) { 255 // No more characters left? 256 if (Warn) 257 H.HandleIncompleteSpecifier(Start, E - Start); 258 return true; 259 } 260 261 // Look for the Objective-C modifier flags, if any. 262 // We parse these here, even if they don't apply to 263 // the conversion specifier, and then emit an error 264 // later if the conversion specifier isn't '@'. This 265 // enables better recovery, and we don't know if 266 // these flags are applicable until later. 267 const char *ObjCModifierFlagsStart = nullptr, 268 *ObjCModifierFlagsEnd = nullptr; 269 if (*I == '[') { 270 ObjCModifierFlagsStart = I; 271 ++I; 272 auto flagStart = I; 273 for (;; ++I) { 274 ObjCModifierFlagsEnd = I; 275 if (I == E) { 276 if (Warn) 277 H.HandleIncompleteSpecifier(Start, E - Start); 278 return true; 279 } 280 // Did we find the closing ']'? 281 if (*I == ']') { 282 if (ParseObjCFlags(H, FS, flagStart, I, Warn)) 283 return true; 284 ++I; 285 break; 286 } 287 // There are no separators defined yet for multiple 288 // Objective-C modifier flags. When those are 289 // defined, this is the place to check. 290 } 291 } 292 293 if (*I == '\0') { 294 // Detect spurious null characters, which are likely errors. 295 H.HandleNullChar(I); 296 return true; 297 } 298 299 // Finally, look for the conversion specifier. 300 const char *conversionPosition = I++; 301 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 302 switch (*conversionPosition) { 303 default: 304 break; 305 // C99: 7.19.6.1 (section 8). 306 case '%': k = ConversionSpecifier::PercentArg; break; 307 case 'A': k = ConversionSpecifier::AArg; break; 308 case 'E': k = ConversionSpecifier::EArg; break; 309 case 'F': k = ConversionSpecifier::FArg; break; 310 case 'G': k = ConversionSpecifier::GArg; break; 311 case 'X': k = ConversionSpecifier::XArg; break; 312 case 'a': k = ConversionSpecifier::aArg; break; 313 case 'c': k = ConversionSpecifier::cArg; break; 314 case 'd': k = ConversionSpecifier::dArg; break; 315 case 'e': k = ConversionSpecifier::eArg; break; 316 case 'f': k = ConversionSpecifier::fArg; break; 317 case 'g': k = ConversionSpecifier::gArg; break; 318 case 'i': k = ConversionSpecifier::iArg; break; 319 case 'n': 320 // Not handled, but reserved in OpenCL and FreeBSD kernel. 321 if (!LO.OpenCL && !isFreeBSDKPrintf) 322 k = ConversionSpecifier::nArg; 323 break; 324 case 'o': k = ConversionSpecifier::oArg; break; 325 case 'p': k = ConversionSpecifier::pArg; break; 326 case 's': k = ConversionSpecifier::sArg; break; 327 case 'u': k = ConversionSpecifier::uArg; break; 328 case 'x': k = ConversionSpecifier::xArg; break; 329 // C23. 330 case 'b': 331 if (isFreeBSDKPrintf) 332 k = ConversionSpecifier::FreeBSDbArg; // int followed by char * 333 else 334 k = ConversionSpecifier::bArg; 335 break; 336 case 'B': k = ConversionSpecifier::BArg; break; 337 // POSIX specific. 338 case 'C': k = ConversionSpecifier::CArg; break; 339 case 'S': k = ConversionSpecifier::SArg; break; 340 // Apple extension for os_log 341 case 'P': 342 k = ConversionSpecifier::PArg; 343 break; 344 // Objective-C. 345 case '@': k = ConversionSpecifier::ObjCObjArg; break; 346 // Glibc specific. 347 case 'm': k = ConversionSpecifier::PrintErrno; break; 348 case 'r': 349 if (isFreeBSDKPrintf) 350 k = ConversionSpecifier::FreeBSDrArg; // int 351 break; 352 case 'y': 353 if (isFreeBSDKPrintf) 354 k = ConversionSpecifier::FreeBSDyArg; // int 355 break; 356 // Apple-specific. 357 case 'D': 358 if (isFreeBSDKPrintf) 359 k = ConversionSpecifier::FreeBSDDArg; // void * followed by char * 360 else if (Target.getTriple().isOSDarwin()) 361 k = ConversionSpecifier::DArg; 362 break; 363 case 'O': 364 if (Target.getTriple().isOSDarwin()) 365 k = ConversionSpecifier::OArg; 366 break; 367 case 'U': 368 if (Target.getTriple().isOSDarwin()) 369 k = ConversionSpecifier::UArg; 370 break; 371 // MS specific. 372 case 'Z': 373 if (Target.getTriple().isOSMSVCRT()) 374 k = ConversionSpecifier::ZArg; 375 break; 376 } 377 378 // Check to see if we used the Objective-C modifier flags with 379 // a conversion specifier other than '@'. 380 if (k != ConversionSpecifier::ObjCObjArg && 381 k != ConversionSpecifier::InvalidSpecifier && 382 ObjCModifierFlagsStart) { 383 H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart, 384 ObjCModifierFlagsEnd + 1, 385 conversionPosition); 386 return true; 387 } 388 389 PrintfConversionSpecifier CS(conversionPosition, k); 390 FS.setConversionSpecifier(CS); 391 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 392 FS.setArgIndex(argIndex++); 393 // FreeBSD kernel specific. 394 if (k == ConversionSpecifier::FreeBSDbArg || 395 k == ConversionSpecifier::FreeBSDDArg) 396 argIndex++; 397 398 if (k == ConversionSpecifier::InvalidSpecifier) { 399 unsigned Len = I - Start; 400 if (ParseUTF8InvalidSpecifier(Start, E, Len)) { 401 CS.setEndScanList(Start + Len); 402 FS.setConversionSpecifier(CS); 403 } 404 // Assume the conversion takes one argument. 405 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len); 406 } 407 return PrintfSpecifierResult(Start, FS); 408 } 409 410 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 411 const char *I, 412 const char *E, 413 const LangOptions &LO, 414 const TargetInfo &Target, 415 bool isFreeBSDKPrintf) { 416 417 unsigned argIndex = 0; 418 419 // Keep looking for a format specifier until we have exhausted the string. 420 while (I != E) { 421 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 422 LO, Target, true, 423 isFreeBSDKPrintf); 424 // Did a fail-stop error of any kind occur when parsing the specifier? 425 // If so, don't do any more processing. 426 if (FSR.shouldStop()) 427 return true; 428 // Did we exhaust the string or encounter an error that 429 // we can recover from? 430 if (!FSR.hasValue()) 431 continue; 432 // We have a format specifier. Pass it to the callback. 433 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 434 I - FSR.getStart(), Target)) 435 return true; 436 } 437 assert(I == E && "Format string not exhausted"); 438 return false; 439 } 440 441 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, 442 const char *E, 443 const LangOptions &LO, 444 const TargetInfo &Target) { 445 446 unsigned argIndex = 0; 447 448 // Keep looking for a %s format specifier until we have exhausted the string. 449 FormatStringHandler H; 450 while (I != E) { 451 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 452 LO, Target, false, 453 false); 454 // Did a fail-stop error of any kind occur when parsing the specifier? 455 // If so, don't do any more processing. 456 if (FSR.shouldStop()) 457 return false; 458 // Did we exhaust the string or encounter an error that 459 // we can recover from? 460 if (!FSR.hasValue()) 461 continue; 462 const analyze_printf::PrintfSpecifier &FS = FSR.getValue(); 463 // Return true if this a %s format specifier. 464 if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg) 465 return true; 466 } 467 return false; 468 } 469 470 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( 471 const char *Begin, const char *End, const LangOptions &LO, 472 const TargetInfo &Target) { 473 unsigned ArgIndex = 0; 474 // Keep looking for a formatting specifier until we have exhausted the string. 475 FormatStringHandler H; 476 while (Begin != End) { 477 const PrintfSpecifierResult &FSR = 478 ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false); 479 if (FSR.shouldStop()) 480 break; 481 if (FSR.hasValue()) 482 return true; 483 } 484 return false; 485 } 486 487 //===----------------------------------------------------------------------===// 488 // Methods on PrintfSpecifier. 489 //===----------------------------------------------------------------------===// 490 491 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx, 492 bool IsObjCLiteral) const { 493 if (CS.getKind() == ConversionSpecifier::cArg) 494 switch (LM.getKind()) { 495 case LengthModifier::None: 496 return Ctx.IntTy; 497 case LengthModifier::AsLong: 498 case LengthModifier::AsWide: 499 return ArgType(ArgType::WIntTy, "wint_t"); 500 case LengthModifier::AsShort: 501 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 502 return Ctx.IntTy; 503 [[fallthrough]]; 504 default: 505 return ArgType::Invalid(); 506 } 507 508 if (CS.isIntArg()) 509 switch (LM.getKind()) { 510 case LengthModifier::AsLongDouble: 511 // GNU extension. 512 return Ctx.LongLongTy; 513 case LengthModifier::None: 514 case LengthModifier::AsShortLong: 515 return Ctx.IntTy; 516 case LengthModifier::AsInt32: 517 return ArgType(Ctx.IntTy, "__int32"); 518 case LengthModifier::AsChar: 519 return ArgType::AnyCharTy; 520 case LengthModifier::AsShort: return Ctx.ShortTy; 521 case LengthModifier::AsLong: return Ctx.LongTy; 522 case LengthModifier::AsLongLong: 523 case LengthModifier::AsQuad: 524 return Ctx.LongLongTy; 525 case LengthModifier::AsInt64: 526 return ArgType(Ctx.LongLongTy, "__int64"); 527 case LengthModifier::AsIntMax: 528 return ArgType(Ctx.getIntMaxType(), "intmax_t"); 529 case LengthModifier::AsSizeT: 530 return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 531 case LengthModifier::AsInt3264: 532 return Ctx.getTargetInfo().getTriple().isArch64Bit() 533 ? ArgType(Ctx.LongLongTy, "__int64") 534 : ArgType(Ctx.IntTy, "__int32"); 535 case LengthModifier::AsPtrDiff: 536 return ArgType::makePtrdiffT( 537 ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 538 case LengthModifier::AsAllocate: 539 case LengthModifier::AsMAllocate: 540 case LengthModifier::AsWide: 541 return ArgType::Invalid(); 542 } 543 544 if (CS.isUIntArg()) 545 switch (LM.getKind()) { 546 case LengthModifier::AsLongDouble: 547 // GNU extension. 548 return Ctx.UnsignedLongLongTy; 549 case LengthModifier::None: 550 case LengthModifier::AsShortLong: 551 return Ctx.UnsignedIntTy; 552 case LengthModifier::AsInt32: 553 return ArgType(Ctx.UnsignedIntTy, "unsigned __int32"); 554 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 555 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 556 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 557 case LengthModifier::AsLongLong: 558 case LengthModifier::AsQuad: 559 return Ctx.UnsignedLongLongTy; 560 case LengthModifier::AsInt64: 561 return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"); 562 case LengthModifier::AsIntMax: 563 return ArgType(Ctx.getUIntMaxType(), "uintmax_t"); 564 case LengthModifier::AsSizeT: 565 return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t")); 566 case LengthModifier::AsInt3264: 567 return Ctx.getTargetInfo().getTriple().isArch64Bit() 568 ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64") 569 : ArgType(Ctx.UnsignedIntTy, "unsigned __int32"); 570 case LengthModifier::AsPtrDiff: 571 return ArgType::makePtrdiffT( 572 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 573 case LengthModifier::AsAllocate: 574 case LengthModifier::AsMAllocate: 575 case LengthModifier::AsWide: 576 return ArgType::Invalid(); 577 } 578 579 if (CS.isDoubleArg()) { 580 if (!VectorNumElts.isInvalid()) { 581 switch (LM.getKind()) { 582 case LengthModifier::AsShort: 583 return Ctx.HalfTy; 584 case LengthModifier::AsShortLong: 585 return Ctx.FloatTy; 586 case LengthModifier::AsLong: 587 default: 588 return Ctx.DoubleTy; 589 } 590 } 591 592 if (LM.getKind() == LengthModifier::AsLongDouble) 593 return Ctx.LongDoubleTy; 594 return Ctx.DoubleTy; 595 } 596 597 if (CS.getKind() == ConversionSpecifier::nArg) { 598 switch (LM.getKind()) { 599 case LengthModifier::None: 600 return ArgType::PtrTo(Ctx.IntTy); 601 case LengthModifier::AsChar: 602 return ArgType::PtrTo(Ctx.SignedCharTy); 603 case LengthModifier::AsShort: 604 return ArgType::PtrTo(Ctx.ShortTy); 605 case LengthModifier::AsLong: 606 return ArgType::PtrTo(Ctx.LongTy); 607 case LengthModifier::AsLongLong: 608 case LengthModifier::AsQuad: 609 return ArgType::PtrTo(Ctx.LongLongTy); 610 case LengthModifier::AsIntMax: 611 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 612 case LengthModifier::AsSizeT: 613 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 614 case LengthModifier::AsPtrDiff: 615 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 616 case LengthModifier::AsLongDouble: 617 return ArgType(); // FIXME: Is this a known extension? 618 case LengthModifier::AsAllocate: 619 case LengthModifier::AsMAllocate: 620 case LengthModifier::AsInt32: 621 case LengthModifier::AsInt3264: 622 case LengthModifier::AsInt64: 623 case LengthModifier::AsWide: 624 return ArgType::Invalid(); 625 case LengthModifier::AsShortLong: 626 llvm_unreachable("only used for OpenCL which doesn not handle nArg"); 627 } 628 } 629 630 switch (CS.getKind()) { 631 case ConversionSpecifier::sArg: 632 if (LM.getKind() == LengthModifier::AsWideChar) { 633 if (IsObjCLiteral) 634 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()), 635 "const unichar *"); 636 return ArgType(ArgType::WCStrTy, "wchar_t *"); 637 } 638 if (LM.getKind() == LengthModifier::AsWide) 639 return ArgType(ArgType::WCStrTy, "wchar_t *"); 640 return ArgType::CStrTy; 641 case ConversionSpecifier::SArg: 642 if (IsObjCLiteral) 643 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()), 644 "const unichar *"); 645 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && 646 LM.getKind() == LengthModifier::AsShort) 647 return ArgType::CStrTy; 648 return ArgType(ArgType::WCStrTy, "wchar_t *"); 649 case ConversionSpecifier::CArg: 650 if (IsObjCLiteral) 651 return ArgType(Ctx.UnsignedShortTy, "unichar"); 652 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && 653 LM.getKind() == LengthModifier::AsShort) 654 return Ctx.IntTy; 655 return ArgType(Ctx.WideCharTy, "wchar_t"); 656 case ConversionSpecifier::pArg: 657 case ConversionSpecifier::PArg: 658 return ArgType::CPointerTy; 659 case ConversionSpecifier::ObjCObjArg: 660 return ArgType::ObjCPointerTy; 661 default: 662 break; 663 } 664 665 // FIXME: Handle other cases. 666 return ArgType(); 667 } 668 669 670 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, 671 bool IsObjCLiteral) const { 672 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 673 674 if (!CS.consumesDataArgument()) 675 return ArgType::Invalid(); 676 677 ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral); 678 if (!ScalarTy.isValid() || VectorNumElts.isInvalid()) 679 return ScalarTy; 680 681 return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount()); 682 } 683 684 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 685 ASTContext &Ctx, bool IsObjCLiteral) { 686 // %n is different from other conversion specifiers; don't try to fix it. 687 if (CS.getKind() == ConversionSpecifier::nArg) 688 return false; 689 690 // Handle Objective-C objects first. Note that while the '%@' specifier will 691 // not warn for structure pointer or void pointer arguments (because that's 692 // how CoreFoundation objects are implemented), we only show a fixit for '%@' 693 // if we know it's an object (block, id, class, or __attribute__((NSObject))). 694 if (QT->isObjCRetainableType()) { 695 if (!IsObjCLiteral) 696 return false; 697 698 CS.setKind(ConversionSpecifier::ObjCObjArg); 699 700 // Disable irrelevant flags 701 HasThousandsGrouping = false; 702 HasPlusPrefix = false; 703 HasSpacePrefix = false; 704 HasAlternativeForm = false; 705 HasLeadingZeroes = false; 706 Precision.setHowSpecified(OptionalAmount::NotSpecified); 707 LM.setKind(LengthModifier::None); 708 709 return true; 710 } 711 712 // Handle strings next (char *, wchar_t *) 713 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 714 CS.setKind(ConversionSpecifier::sArg); 715 716 // Disable irrelevant flags 717 HasAlternativeForm = false; 718 HasLeadingZeroes = false; 719 720 // Set the long length modifier for wide characters 721 if (QT->getPointeeType()->isWideCharType()) 722 LM.setKind(LengthModifier::AsWideChar); 723 else 724 LM.setKind(LengthModifier::None); 725 726 return true; 727 } 728 729 // If it's an enum, get its underlying type. 730 if (const EnumType *ETy = QT->getAs<EnumType>()) 731 QT = ETy->getDecl()->getIntegerType(); 732 733 const BuiltinType *BT = QT->getAs<BuiltinType>(); 734 if (!BT) { 735 const VectorType *VT = QT->getAs<VectorType>(); 736 if (VT) { 737 QT = VT->getElementType(); 738 BT = QT->getAs<BuiltinType>(); 739 VectorNumElts = OptionalAmount(VT->getNumElements()); 740 } 741 } 742 743 // We can only work with builtin types. 744 if (!BT) 745 return false; 746 747 // Set length modifier 748 switch (BT->getKind()) { 749 case BuiltinType::Bool: 750 case BuiltinType::WChar_U: 751 case BuiltinType::WChar_S: 752 case BuiltinType::Char8: // FIXME: Treat like 'char'? 753 case BuiltinType::Char16: 754 case BuiltinType::Char32: 755 case BuiltinType::UInt128: 756 case BuiltinType::Int128: 757 case BuiltinType::Half: 758 case BuiltinType::BFloat16: 759 case BuiltinType::Float16: 760 case BuiltinType::Float128: 761 case BuiltinType::Ibm128: 762 case BuiltinType::ShortAccum: 763 case BuiltinType::Accum: 764 case BuiltinType::LongAccum: 765 case BuiltinType::UShortAccum: 766 case BuiltinType::UAccum: 767 case BuiltinType::ULongAccum: 768 case BuiltinType::ShortFract: 769 case BuiltinType::Fract: 770 case BuiltinType::LongFract: 771 case BuiltinType::UShortFract: 772 case BuiltinType::UFract: 773 case BuiltinType::ULongFract: 774 case BuiltinType::SatShortAccum: 775 case BuiltinType::SatAccum: 776 case BuiltinType::SatLongAccum: 777 case BuiltinType::SatUShortAccum: 778 case BuiltinType::SatUAccum: 779 case BuiltinType::SatULongAccum: 780 case BuiltinType::SatShortFract: 781 case BuiltinType::SatFract: 782 case BuiltinType::SatLongFract: 783 case BuiltinType::SatUShortFract: 784 case BuiltinType::SatUFract: 785 case BuiltinType::SatULongFract: 786 // Various types which are non-trivial to correct. 787 return false; 788 789 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ 790 case BuiltinType::Id: 791 #include "clang/Basic/OpenCLImageTypes.def" 792 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ 793 case BuiltinType::Id: 794 #include "clang/Basic/OpenCLExtensionTypes.def" 795 #define SVE_TYPE(Name, Id, SingletonId) \ 796 case BuiltinType::Id: 797 #include "clang/Basic/AArch64SVEACLETypes.def" 798 #define PPC_VECTOR_TYPE(Name, Id, Size) \ 799 case BuiltinType::Id: 800 #include "clang/Basic/PPCTypes.def" 801 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: 802 #include "clang/Basic/RISCVVTypes.def" 803 #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: 804 #include "clang/Basic/WebAssemblyReferenceTypes.def" 805 #define SIGNED_TYPE(Id, SingletonId) 806 #define UNSIGNED_TYPE(Id, SingletonId) 807 #define FLOATING_TYPE(Id, SingletonId) 808 #define BUILTIN_TYPE(Id, SingletonId) \ 809 case BuiltinType::Id: 810 #include "clang/AST/BuiltinTypes.def" 811 // Misc other stuff which doesn't make sense here. 812 return false; 813 814 case BuiltinType::UInt: 815 case BuiltinType::Int: 816 case BuiltinType::Float: 817 LM.setKind(VectorNumElts.isInvalid() ? 818 LengthModifier::None : LengthModifier::AsShortLong); 819 break; 820 case BuiltinType::Double: 821 LM.setKind(VectorNumElts.isInvalid() ? 822 LengthModifier::None : LengthModifier::AsLong); 823 break; 824 case BuiltinType::Char_U: 825 case BuiltinType::UChar: 826 case BuiltinType::Char_S: 827 case BuiltinType::SChar: 828 LM.setKind(LengthModifier::AsChar); 829 break; 830 831 case BuiltinType::Short: 832 case BuiltinType::UShort: 833 LM.setKind(LengthModifier::AsShort); 834 break; 835 836 case BuiltinType::Long: 837 case BuiltinType::ULong: 838 LM.setKind(LengthModifier::AsLong); 839 break; 840 841 case BuiltinType::LongLong: 842 case BuiltinType::ULongLong: 843 LM.setKind(LengthModifier::AsLongLong); 844 break; 845 846 case BuiltinType::LongDouble: 847 LM.setKind(LengthModifier::AsLongDouble); 848 break; 849 } 850 851 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 852 if (LangOpt.C99 || LangOpt.CPlusPlus11) 853 namedTypeToLengthModifier(QT, LM); 854 855 // If fixing the length modifier was enough, we might be done. 856 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { 857 // If we're going to offer a fix anyway, make sure the sign matches. 858 switch (CS.getKind()) { 859 case ConversionSpecifier::uArg: 860 case ConversionSpecifier::UArg: 861 if (QT->isSignedIntegerType()) 862 CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg); 863 break; 864 case ConversionSpecifier::dArg: 865 case ConversionSpecifier::DArg: 866 case ConversionSpecifier::iArg: 867 if (QT->isUnsignedIntegerType() && !HasPlusPrefix) 868 CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg); 869 break; 870 default: 871 // Other specifiers do not have signed/unsigned variants. 872 break; 873 } 874 875 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral); 876 if (ATR.isValid() && ATR.matchesType(Ctx, QT)) 877 return true; 878 } 879 880 // Set conversion specifier and disable any flags which do not apply to it. 881 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 882 if (!QT->getAs<TypedefType>() && QT->isCharType()) { 883 CS.setKind(ConversionSpecifier::cArg); 884 LM.setKind(LengthModifier::None); 885 Precision.setHowSpecified(OptionalAmount::NotSpecified); 886 HasAlternativeForm = false; 887 HasLeadingZeroes = false; 888 HasPlusPrefix = false; 889 } 890 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 891 else if (QT->isRealFloatingType()) { 892 CS.setKind(ConversionSpecifier::fArg); 893 } else if (QT->isSignedIntegerType()) { 894 CS.setKind(ConversionSpecifier::dArg); 895 HasAlternativeForm = false; 896 } else if (QT->isUnsignedIntegerType()) { 897 CS.setKind(ConversionSpecifier::uArg); 898 HasAlternativeForm = false; 899 HasPlusPrefix = false; 900 } else { 901 llvm_unreachable("Unexpected type"); 902 } 903 904 return true; 905 } 906 907 void PrintfSpecifier::toString(raw_ostream &os) const { 908 // Whilst some features have no defined order, we are using the order 909 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 910 os << "%"; 911 912 // Positional args 913 if (usesPositionalArg()) { 914 os << getPositionalArgIndex() << "$"; 915 } 916 917 // Conversion flags 918 if (IsLeftJustified) os << "-"; 919 if (HasPlusPrefix) os << "+"; 920 if (HasSpacePrefix) os << " "; 921 if (HasAlternativeForm) os << "#"; 922 if (HasLeadingZeroes) os << "0"; 923 924 // Minimum field width 925 FieldWidth.toString(os); 926 // Precision 927 Precision.toString(os); 928 929 // Vector modifier 930 if (!VectorNumElts.isInvalid()) 931 os << 'v' << VectorNumElts.getConstantAmount(); 932 933 // Length modifier 934 os << LM.toString(); 935 // Conversion specifier 936 os << CS.toString(); 937 } 938 939 bool PrintfSpecifier::hasValidPlusPrefix() const { 940 if (!HasPlusPrefix) 941 return true; 942 943 // The plus prefix only makes sense for signed conversions 944 switch (CS.getKind()) { 945 case ConversionSpecifier::dArg: 946 case ConversionSpecifier::DArg: 947 case ConversionSpecifier::iArg: 948 case ConversionSpecifier::fArg: 949 case ConversionSpecifier::FArg: 950 case ConversionSpecifier::eArg: 951 case ConversionSpecifier::EArg: 952 case ConversionSpecifier::gArg: 953 case ConversionSpecifier::GArg: 954 case ConversionSpecifier::aArg: 955 case ConversionSpecifier::AArg: 956 case ConversionSpecifier::FreeBSDrArg: 957 case ConversionSpecifier::FreeBSDyArg: 958 return true; 959 960 default: 961 return false; 962 } 963 } 964 965 bool PrintfSpecifier::hasValidAlternativeForm() const { 966 if (!HasAlternativeForm) 967 return true; 968 969 // Alternate form flag only valid with the bBoxXaAeEfFgG conversions 970 switch (CS.getKind()) { 971 case ConversionSpecifier::bArg: 972 case ConversionSpecifier::BArg: 973 case ConversionSpecifier::oArg: 974 case ConversionSpecifier::OArg: 975 case ConversionSpecifier::xArg: 976 case ConversionSpecifier::XArg: 977 case ConversionSpecifier::aArg: 978 case ConversionSpecifier::AArg: 979 case ConversionSpecifier::eArg: 980 case ConversionSpecifier::EArg: 981 case ConversionSpecifier::fArg: 982 case ConversionSpecifier::FArg: 983 case ConversionSpecifier::gArg: 984 case ConversionSpecifier::GArg: 985 case ConversionSpecifier::FreeBSDrArg: 986 case ConversionSpecifier::FreeBSDyArg: 987 return true; 988 989 default: 990 return false; 991 } 992 } 993 994 bool PrintfSpecifier::hasValidLeadingZeros() const { 995 if (!HasLeadingZeroes) 996 return true; 997 998 // Leading zeroes flag only valid with the bBdiouxXaAeEfFgG conversions 999 switch (CS.getKind()) { 1000 case ConversionSpecifier::bArg: 1001 case ConversionSpecifier::BArg: 1002 case ConversionSpecifier::dArg: 1003 case ConversionSpecifier::DArg: 1004 case ConversionSpecifier::iArg: 1005 case ConversionSpecifier::oArg: 1006 case ConversionSpecifier::OArg: 1007 case ConversionSpecifier::uArg: 1008 case ConversionSpecifier::UArg: 1009 case ConversionSpecifier::xArg: 1010 case ConversionSpecifier::XArg: 1011 case ConversionSpecifier::aArg: 1012 case ConversionSpecifier::AArg: 1013 case ConversionSpecifier::eArg: 1014 case ConversionSpecifier::EArg: 1015 case ConversionSpecifier::fArg: 1016 case ConversionSpecifier::FArg: 1017 case ConversionSpecifier::gArg: 1018 case ConversionSpecifier::GArg: 1019 case ConversionSpecifier::FreeBSDrArg: 1020 case ConversionSpecifier::FreeBSDyArg: 1021 return true; 1022 1023 default: 1024 return false; 1025 } 1026 } 1027 1028 bool PrintfSpecifier::hasValidSpacePrefix() const { 1029 if (!HasSpacePrefix) 1030 return true; 1031 1032 // The space prefix only makes sense for signed conversions 1033 switch (CS.getKind()) { 1034 case ConversionSpecifier::dArg: 1035 case ConversionSpecifier::DArg: 1036 case ConversionSpecifier::iArg: 1037 case ConversionSpecifier::fArg: 1038 case ConversionSpecifier::FArg: 1039 case ConversionSpecifier::eArg: 1040 case ConversionSpecifier::EArg: 1041 case ConversionSpecifier::gArg: 1042 case ConversionSpecifier::GArg: 1043 case ConversionSpecifier::aArg: 1044 case ConversionSpecifier::AArg: 1045 case ConversionSpecifier::FreeBSDrArg: 1046 case ConversionSpecifier::FreeBSDyArg: 1047 return true; 1048 1049 default: 1050 return false; 1051 } 1052 } 1053 1054 bool PrintfSpecifier::hasValidLeftJustified() const { 1055 if (!IsLeftJustified) 1056 return true; 1057 1058 // The left justified flag is valid for all conversions except n 1059 switch (CS.getKind()) { 1060 case ConversionSpecifier::nArg: 1061 return false; 1062 1063 default: 1064 return true; 1065 } 1066 } 1067 1068 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 1069 if (!HasThousandsGrouping) 1070 return true; 1071 1072 switch (CS.getKind()) { 1073 case ConversionSpecifier::dArg: 1074 case ConversionSpecifier::DArg: 1075 case ConversionSpecifier::iArg: 1076 case ConversionSpecifier::uArg: 1077 case ConversionSpecifier::UArg: 1078 case ConversionSpecifier::fArg: 1079 case ConversionSpecifier::FArg: 1080 case ConversionSpecifier::gArg: 1081 case ConversionSpecifier::GArg: 1082 return true; 1083 default: 1084 return false; 1085 } 1086 } 1087 1088 bool PrintfSpecifier::hasValidPrecision() const { 1089 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 1090 return true; 1091 1092 // Precision is only valid with the bBdiouxXaAeEfFgGsP conversions 1093 switch (CS.getKind()) { 1094 case ConversionSpecifier::bArg: 1095 case ConversionSpecifier::BArg: 1096 case ConversionSpecifier::dArg: 1097 case ConversionSpecifier::DArg: 1098 case ConversionSpecifier::iArg: 1099 case ConversionSpecifier::oArg: 1100 case ConversionSpecifier::OArg: 1101 case ConversionSpecifier::uArg: 1102 case ConversionSpecifier::UArg: 1103 case ConversionSpecifier::xArg: 1104 case ConversionSpecifier::XArg: 1105 case ConversionSpecifier::aArg: 1106 case ConversionSpecifier::AArg: 1107 case ConversionSpecifier::eArg: 1108 case ConversionSpecifier::EArg: 1109 case ConversionSpecifier::fArg: 1110 case ConversionSpecifier::FArg: 1111 case ConversionSpecifier::gArg: 1112 case ConversionSpecifier::GArg: 1113 case ConversionSpecifier::sArg: 1114 case ConversionSpecifier::FreeBSDrArg: 1115 case ConversionSpecifier::FreeBSDyArg: 1116 case ConversionSpecifier::PArg: 1117 return true; 1118 1119 default: 1120 return false; 1121 } 1122 } 1123 bool PrintfSpecifier::hasValidFieldWidth() const { 1124 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 1125 return true; 1126 1127 // The field width is valid for all conversions except n 1128 switch (CS.getKind()) { 1129 case ConversionSpecifier::nArg: 1130 return false; 1131 1132 default: 1133 return true; 1134 } 1135 } 1136