1 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Shared details for processing format strings of printf and scanf 10 // (and friends). 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "FormatStringParsing.h" 15 #include "clang/Basic/LangOptions.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "llvm/Support/ConvertUTF.h" 18 #include <optional> 19 20 using clang::analyze_format_string::ArgType; 21 using clang::analyze_format_string::FormatStringHandler; 22 using clang::analyze_format_string::FormatSpecifier; 23 using clang::analyze_format_string::LengthModifier; 24 using clang::analyze_format_string::OptionalAmount; 25 using clang::analyze_format_string::ConversionSpecifier; 26 using namespace clang; 27 28 // Key function to FormatStringHandler. 29 FormatStringHandler::~FormatStringHandler() {} 30 31 //===----------------------------------------------------------------------===// 32 // Functions for parsing format strings components in both printf and 33 // scanf format strings. 34 //===----------------------------------------------------------------------===// 35 36 OptionalAmount 37 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 38 const char *I = Beg; 39 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 40 41 unsigned accumulator = 0; 42 bool hasDigits = false; 43 44 for ( ; I != E; ++I) { 45 char c = *I; 46 if (c >= '0' && c <= '9') { 47 hasDigits = true; 48 accumulator = (accumulator * 10) + (c - '0'); 49 continue; 50 } 51 52 if (hasDigits) 53 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 54 false); 55 56 break; 57 } 58 59 return OptionalAmount(); 60 } 61 62 OptionalAmount 63 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 64 const char *E, 65 unsigned &argIndex) { 66 if (*Beg == '*') { 67 ++Beg; 68 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 69 } 70 71 return ParseAmount(Beg, E); 72 } 73 74 OptionalAmount 75 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 76 const char *Start, 77 const char *&Beg, 78 const char *E, 79 PositionContext p) { 80 if (*Beg == '*') { 81 const char *I = Beg + 1; 82 const OptionalAmount &Amt = ParseAmount(I, E); 83 84 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 85 H.HandleInvalidPosition(Beg, I - Beg, p); 86 return OptionalAmount(false); 87 } 88 89 if (I == E) { 90 // No more characters left? 91 H.HandleIncompleteSpecifier(Start, E - Start); 92 return OptionalAmount(false); 93 } 94 95 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 96 97 if (*I == '$') { 98 // Handle positional arguments 99 100 // Special case: '*0$', since this is an easy mistake. 101 if (Amt.getConstantAmount() == 0) { 102 H.HandleZeroPosition(Beg, I - Beg + 1); 103 return OptionalAmount(false); 104 } 105 106 const char *Tmp = Beg; 107 Beg = ++I; 108 109 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 110 Tmp, 0, true); 111 } 112 113 H.HandleInvalidPosition(Beg, I - Beg, p); 114 return OptionalAmount(false); 115 } 116 117 return ParseAmount(Beg, E); 118 } 119 120 121 bool 122 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 123 FormatSpecifier &CS, 124 const char *Start, 125 const char *&Beg, const char *E, 126 unsigned *argIndex) { 127 // FIXME: Support negative field widths. 128 if (argIndex) { 129 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 130 } 131 else { 132 const OptionalAmount Amt = 133 ParsePositionAmount(H, Start, Beg, E, 134 analyze_format_string::FieldWidthPos); 135 136 if (Amt.isInvalid()) 137 return true; 138 CS.setFieldWidth(Amt); 139 } 140 return false; 141 } 142 143 bool 144 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 145 FormatSpecifier &FS, 146 const char *Start, 147 const char *&Beg, 148 const char *E) { 149 const char *I = Beg; 150 151 const OptionalAmount &Amt = ParseAmount(I, E); 152 153 if (I == E) { 154 // No more characters left? 155 H.HandleIncompleteSpecifier(Start, E - Start); 156 return true; 157 } 158 159 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 160 // Warn that positional arguments are non-standard. 161 H.HandlePosition(Start, I - Start); 162 163 // Special case: '%0$', since this is an easy mistake. 164 if (Amt.getConstantAmount() == 0) { 165 H.HandleZeroPosition(Start, I - Start); 166 return true; 167 } 168 169 FS.setArgIndex(Amt.getConstantAmount() - 1); 170 FS.setUsesPositionalArg(); 171 // Update the caller's pointer if we decided to consume 172 // these characters. 173 Beg = I; 174 return false; 175 } 176 177 return false; 178 } 179 180 bool 181 clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, 182 FormatSpecifier &FS, 183 const char *&I, 184 const char *E, 185 const LangOptions &LO) { 186 if (!LO.OpenCL) 187 return false; 188 189 const char *Start = I; 190 if (*I == 'v') { 191 ++I; 192 193 if (I == E) { 194 H.HandleIncompleteSpecifier(Start, E - Start); 195 return true; 196 } 197 198 OptionalAmount NumElts = ParseAmount(I, E); 199 if (NumElts.getHowSpecified() != OptionalAmount::Constant) { 200 H.HandleIncompleteSpecifier(Start, E - Start); 201 return true; 202 } 203 204 FS.setVectorNumElts(NumElts); 205 } 206 207 return false; 208 } 209 210 bool 211 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 212 const char *&I, 213 const char *E, 214 const LangOptions &LO, 215 bool IsScanf) { 216 LengthModifier::Kind lmKind = LengthModifier::None; 217 const char *lmPosition = I; 218 switch (*I) { 219 default: 220 return false; 221 case 'h': 222 ++I; 223 if (I != E && *I == 'h') { 224 ++I; 225 lmKind = LengthModifier::AsChar; 226 } else if (I != E && *I == 'l' && LO.OpenCL) { 227 ++I; 228 lmKind = LengthModifier::AsShortLong; 229 } else { 230 lmKind = LengthModifier::AsShort; 231 } 232 break; 233 case 'l': 234 ++I; 235 if (I != E && *I == 'l') { 236 ++I; 237 lmKind = LengthModifier::AsLongLong; 238 } else { 239 lmKind = LengthModifier::AsLong; 240 } 241 break; 242 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 243 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 244 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 245 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 246 case 'q': lmKind = LengthModifier::AsQuad; ++I; break; 247 case 'a': 248 if (IsScanf && !LO.C99 && !LO.CPlusPlus11) { 249 // For scanf in C90, look at the next character to see if this should 250 // be parsed as the GNU extension 'a' length modifier. If not, this 251 // will be parsed as a conversion specifier. 252 ++I; 253 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { 254 lmKind = LengthModifier::AsAllocate; 255 break; 256 } 257 --I; 258 } 259 return false; 260 case 'm': 261 if (IsScanf) { 262 lmKind = LengthModifier::AsMAllocate; 263 ++I; 264 break; 265 } 266 return false; 267 // printf: AsInt64, AsInt32, AsInt3264 268 // scanf: AsInt64 269 case 'I': 270 if (I + 1 != E && I + 2 != E) { 271 if (I[1] == '6' && I[2] == '4') { 272 I += 3; 273 lmKind = LengthModifier::AsInt64; 274 break; 275 } 276 if (IsScanf) 277 return false; 278 279 if (I[1] == '3' && I[2] == '2') { 280 I += 3; 281 lmKind = LengthModifier::AsInt32; 282 break; 283 } 284 } 285 ++I; 286 lmKind = LengthModifier::AsInt3264; 287 break; 288 case 'w': 289 lmKind = LengthModifier::AsWide; ++I; break; 290 } 291 LengthModifier lm(lmPosition, lmKind); 292 FS.setLengthModifier(lm); 293 return true; 294 } 295 296 bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( 297 const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) { 298 if (SpecifierBegin + 1 >= FmtStrEnd) 299 return false; 300 301 const llvm::UTF8 *SB = 302 reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1); 303 const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd); 304 const char FirstByte = *SB; 305 306 // If the invalid specifier is a multibyte UTF-8 string, return the 307 // total length accordingly so that the conversion specifier can be 308 // properly updated to reflect a complete UTF-8 specifier. 309 unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte); 310 if (NumBytes == 1) 311 return false; 312 if (SB + NumBytes > SE) 313 return false; 314 315 Len = NumBytes + 1; 316 return true; 317 } 318 319 //===----------------------------------------------------------------------===// 320 // Methods on ArgType. 321 //===----------------------------------------------------------------------===// 322 323 clang::analyze_format_string::ArgType::MatchKind 324 ArgType::matchesType(ASTContext &C, QualType argTy) const { 325 // When using the format attribute in C++, you can receive a function or an 326 // array that will necessarily decay to a pointer when passed to the final 327 // format consumer. Apply decay before type comparison. 328 if (argTy->canDecayToPointerType()) 329 argTy = C.getDecayedType(argTy); 330 331 if (Ptr) { 332 // It has to be a pointer. 333 const PointerType *PT = argTy->getAs<PointerType>(); 334 if (!PT) 335 return NoMatch; 336 337 // We cannot write through a const qualified pointer. 338 if (PT->getPointeeType().isConstQualified()) 339 return NoMatch; 340 341 argTy = PT->getPointeeType(); 342 } 343 344 switch (K) { 345 case InvalidTy: 346 llvm_unreachable("ArgType must be valid"); 347 348 case UnknownTy: 349 return Match; 350 351 case AnyCharTy: { 352 if (const auto *ETy = argTy->getAs<EnumType>()) { 353 // If the enum is incomplete we know nothing about the underlying type. 354 // Assume that it's 'int'. Do not use the underlying type for a scoped 355 // enumeration. 356 if (!ETy->getDecl()->isComplete()) 357 return NoMatch; 358 if (ETy->isUnscopedEnumerationType()) 359 argTy = ETy->getDecl()->getIntegerType(); 360 } 361 362 if (const auto *BT = argTy->getAs<BuiltinType>()) { 363 // The types are perfectly matched? 364 switch (BT->getKind()) { 365 default: 366 break; 367 case BuiltinType::Char_S: 368 case BuiltinType::SChar: 369 case BuiltinType::UChar: 370 case BuiltinType::Char_U: 371 return Match; 372 case BuiltinType::Bool: 373 if (!Ptr) 374 return Match; 375 break; 376 } 377 // "Partially matched" because of promotions? 378 if (!Ptr) { 379 switch (BT->getKind()) { 380 default: 381 break; 382 case BuiltinType::Int: 383 case BuiltinType::UInt: 384 return MatchPromotion; 385 case BuiltinType::Short: 386 case BuiltinType::UShort: 387 case BuiltinType::WChar_S: 388 case BuiltinType::WChar_U: 389 return NoMatchPromotionTypeConfusion; 390 } 391 } 392 } 393 return NoMatch; 394 } 395 396 case SpecificTy: { 397 if (const EnumType *ETy = argTy->getAs<EnumType>()) { 398 // If the enum is incomplete we know nothing about the underlying type. 399 // Assume that it's 'int'. Do not use the underlying type for a scoped 400 // enumeration as that needs an exact match. 401 if (!ETy->getDecl()->isComplete()) 402 argTy = C.IntTy; 403 else if (ETy->isUnscopedEnumerationType()) 404 argTy = ETy->getDecl()->getIntegerType(); 405 } 406 407 if (argTy->isSaturatedFixedPointType()) 408 argTy = C.getCorrespondingUnsaturatedType(argTy); 409 410 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 411 412 if (T == argTy) 413 return Match; 414 if (const auto *BT = argTy->getAs<BuiltinType>()) { 415 // Check if the only difference between them is signed vs unsigned 416 // if true, return match signedness. 417 switch (BT->getKind()) { 418 default: 419 break; 420 case BuiltinType::Bool: 421 if (Ptr && (T == C.UnsignedCharTy || T == C.SignedCharTy)) 422 return NoMatch; 423 [[fallthrough]]; 424 case BuiltinType::Char_S: 425 case BuiltinType::SChar: 426 if (T == C.UnsignedShortTy || T == C.ShortTy) 427 return NoMatchTypeConfusion; 428 if (T == C.UnsignedCharTy) 429 return NoMatchSignedness; 430 if (T == C.SignedCharTy) 431 return Match; 432 break; 433 case BuiltinType::Char_U: 434 case BuiltinType::UChar: 435 if (T == C.UnsignedShortTy || T == C.ShortTy) 436 return NoMatchTypeConfusion; 437 if (T == C.UnsignedCharTy) 438 return Match; 439 if (T == C.SignedCharTy) 440 return NoMatchSignedness; 441 break; 442 case BuiltinType::Short: 443 if (T == C.UnsignedShortTy) 444 return NoMatchSignedness; 445 break; 446 case BuiltinType::UShort: 447 if (T == C.ShortTy) 448 return NoMatchSignedness; 449 break; 450 case BuiltinType::Int: 451 if (T == C.UnsignedIntTy) 452 return NoMatchSignedness; 453 break; 454 case BuiltinType::UInt: 455 if (T == C.IntTy) 456 return NoMatchSignedness; 457 break; 458 case BuiltinType::Long: 459 if (T == C.UnsignedLongTy) 460 return NoMatchSignedness; 461 break; 462 case BuiltinType::ULong: 463 if (T == C.LongTy) 464 return NoMatchSignedness; 465 break; 466 case BuiltinType::LongLong: 467 if (T == C.UnsignedLongLongTy) 468 return NoMatchSignedness; 469 break; 470 case BuiltinType::ULongLong: 471 if (T == C.LongLongTy) 472 return NoMatchSignedness; 473 break; 474 } 475 // "Partially matched" because of promotions? 476 if (!Ptr) { 477 switch (BT->getKind()) { 478 default: 479 break; 480 case BuiltinType::Bool: 481 if (T == C.IntTy || T == C.UnsignedIntTy) 482 return MatchPromotion; 483 break; 484 case BuiltinType::Int: 485 case BuiltinType::UInt: 486 if (T == C.SignedCharTy || T == C.UnsignedCharTy || 487 T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy || 488 T == C.WideCharTy) 489 return MatchPromotion; 490 break; 491 case BuiltinType::Char_U: 492 if (T == C.UnsignedIntTy) 493 return MatchPromotion; 494 if (T == C.UnsignedShortTy) 495 return NoMatchPromotionTypeConfusion; 496 break; 497 case BuiltinType::Char_S: 498 if (T == C.IntTy) 499 return MatchPromotion; 500 if (T == C.ShortTy) 501 return NoMatchPromotionTypeConfusion; 502 break; 503 case BuiltinType::Half: 504 case BuiltinType::Float: 505 if (T == C.DoubleTy) 506 return MatchPromotion; 507 break; 508 case BuiltinType::Short: 509 case BuiltinType::UShort: 510 if (T == C.SignedCharTy || T == C.UnsignedCharTy) 511 return NoMatchPromotionTypeConfusion; 512 break; 513 case BuiltinType::WChar_U: 514 case BuiltinType::WChar_S: 515 if (T != C.WCharTy && T != C.WideCharTy) 516 return NoMatchPromotionTypeConfusion; 517 } 518 } 519 } 520 return NoMatch; 521 } 522 523 case CStrTy: 524 if (const auto *PT = argTy->getAs<PointerType>(); 525 PT && PT->getPointeeType()->isCharType()) 526 return Match; 527 return NoMatch; 528 529 case WCStrTy: 530 if (const auto *PT = argTy->getAs<PointerType>(); 531 PT && 532 C.hasSameUnqualifiedType(PT->getPointeeType(), C.getWideCharType())) 533 return Match; 534 return NoMatch; 535 536 case WIntTy: { 537 QualType WInt = C.getCanonicalType(C.getWIntType()).getUnqualifiedType(); 538 539 if (C.getCanonicalType(argTy).getUnqualifiedType() == WInt) 540 return Match; 541 542 QualType PromoArg = C.isPromotableIntegerType(argTy) 543 ? C.getPromotedIntegerType(argTy) 544 : argTy; 545 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 546 547 // If the promoted argument is the corresponding signed type of the 548 // wint_t type, then it should match. 549 if (PromoArg->hasSignedIntegerRepresentation() && 550 C.getCorrespondingUnsignedType(PromoArg) == WInt) 551 return Match; 552 553 return WInt == PromoArg ? Match : NoMatch; 554 } 555 556 case CPointerTy: 557 if (const auto *PT = argTy->getAs<PointerType>()) { 558 QualType PointeeTy = PT->getPointeeType(); 559 if (PointeeTy->isVoidType() || (!Ptr && PointeeTy->isCharType())) 560 return Match; 561 return NoMatchPedantic; 562 } 563 564 // nullptr_t* is not a double pointer, so reject when something like 565 // void** is expected. 566 // In C++, nullptr is promoted to void*. In C23, va_arg(ap, void*) is not 567 // undefined when the next argument is of type nullptr_t. 568 if (!Ptr && argTy->isNullPtrType()) 569 return C.getLangOpts().CPlusPlus ? MatchPromotion : Match; 570 571 if (argTy->isObjCObjectPointerType() || argTy->isBlockPointerType()) 572 return NoMatchPedantic; 573 574 return NoMatch; 575 576 case ObjCPointerTy: { 577 if (argTy->getAs<ObjCObjectPointerType>() || 578 argTy->getAs<BlockPointerType>()) 579 return Match; 580 581 // Handle implicit toll-free bridging. 582 if (const PointerType *PT = argTy->getAs<PointerType>()) { 583 // Things such as CFTypeRef are really just opaque pointers 584 // to C structs representing CF types that can often be bridged 585 // to Objective-C objects. Since the compiler doesn't know which 586 // structs can be toll-free bridged, we just accept them all. 587 QualType pointee = PT->getPointeeType(); 588 if (pointee->getAsStructureType() || pointee->isVoidType()) 589 return Match; 590 } 591 return NoMatch; 592 } 593 } 594 595 llvm_unreachable("Invalid ArgType Kind!"); 596 } 597 598 static analyze_format_string::ArgType::MatchKind 599 integerTypeMatch(ASTContext &C, QualType A, QualType B, bool CheckSign) { 600 using MK = analyze_format_string::ArgType::MatchKind; 601 602 uint64_t IntSize = C.getTypeSize(C.IntTy); 603 uint64_t ASize = C.getTypeSize(A); 604 uint64_t BSize = C.getTypeSize(B); 605 if (std::max(ASize, IntSize) != std::max(BSize, IntSize)) 606 return MK::NoMatch; 607 if (CheckSign && A->isSignedIntegerType() != B->isSignedIntegerType()) 608 return MK::NoMatchSignedness; 609 if (ASize != BSize) 610 return MK::MatchPromotion; 611 return MK::Match; 612 } 613 614 analyze_format_string::ArgType::MatchKind 615 ArgType::matchesArgType(ASTContext &C, const ArgType &Other) const { 616 using AK = analyze_format_string::ArgType::Kind; 617 618 // Per matchesType. 619 if (K == AK::InvalidTy || Other.K == AK::InvalidTy) 620 return NoMatch; 621 if (K == AK::UnknownTy || Other.K == AK::UnknownTy) 622 return Match; 623 624 // Handle whether either (or both, or neither) sides has Ptr set, 625 // in addition to whether either (or both, or neither) sides is a SpecificTy 626 // that is a pointer. 627 ArgType Left = *this; 628 bool LeftWasPointer = false; 629 ArgType Right = Other; 630 bool RightWasPointer = false; 631 if (Left.Ptr) { 632 Left.Ptr = false; 633 LeftWasPointer = true; 634 } else if (Left.K == AK::SpecificTy && Left.T->isPointerType()) { 635 Left.T = Left.T->getPointeeType(); 636 LeftWasPointer = true; 637 } 638 if (Right.Ptr) { 639 Right.Ptr = false; 640 RightWasPointer = true; 641 } else if (Right.K == AK::SpecificTy && Right.T->isPointerType()) { 642 Right.T = Right.T->getPointeeType(); 643 RightWasPointer = true; 644 } 645 646 if (LeftWasPointer != RightWasPointer) 647 return NoMatch; 648 649 // Ensure that if at least one side is a SpecificTy, then Left is a 650 // SpecificTy. 651 if (Right.K == AK::SpecificTy) 652 std::swap(Left, Right); 653 654 if (Left.K == AK::SpecificTy) { 655 if (Right.K == AK::SpecificTy) { 656 auto Canon1 = C.getCanonicalType(Left.T); 657 auto Canon2 = C.getCanonicalType(Right.T); 658 if (Canon1 == Canon2) 659 return Match; 660 661 auto *BT1 = QualType(Canon1)->getAs<BuiltinType>(); 662 auto *BT2 = QualType(Canon2)->getAs<BuiltinType>(); 663 if (BT1 == nullptr || BT2 == nullptr) 664 return NoMatch; 665 if (BT1 == BT2) 666 return Match; 667 668 if (!LeftWasPointer && BT1->isInteger() && BT2->isInteger()) 669 return integerTypeMatch(C, Canon1, Canon2, true); 670 return NoMatch; 671 } else if (Right.K == AK::AnyCharTy) { 672 if (!LeftWasPointer && Left.T->isIntegerType()) 673 return integerTypeMatch(C, Left.T, C.CharTy, false); 674 return NoMatch; 675 } else if (Right.K == AK::WIntTy) { 676 if (!LeftWasPointer && Left.T->isIntegerType()) 677 return integerTypeMatch(C, Left.T, C.WIntTy, true); 678 return NoMatch; 679 } 680 // It's hypothetically possible to create an AK::SpecificTy ArgType 681 // that matches another kind of ArgType, but in practice Clang doesn't 682 // do that, so ignore that case. 683 return NoMatch; 684 } 685 686 return Left.K == Right.K ? Match : NoMatch; 687 } 688 689 ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { 690 // Check for valid vector element types. 691 if (T.isNull()) 692 return ArgType::Invalid(); 693 694 QualType Vec = C.getExtVectorType(T, NumElts); 695 return ArgType(Vec, Name); 696 } 697 698 QualType ArgType::getRepresentativeType(ASTContext &C) const { 699 QualType Res; 700 switch (K) { 701 case InvalidTy: 702 llvm_unreachable("No representative type for Invalid ArgType"); 703 case UnknownTy: 704 llvm_unreachable("No representative type for Unknown ArgType"); 705 case AnyCharTy: 706 Res = C.CharTy; 707 break; 708 case SpecificTy: 709 Res = T; 710 break; 711 case CStrTy: 712 Res = C.getPointerType(C.CharTy); 713 break; 714 case WCStrTy: 715 Res = C.getPointerType(C.getWideCharType()); 716 break; 717 case ObjCPointerTy: 718 Res = C.ObjCBuiltinIdTy; 719 break; 720 case CPointerTy: 721 Res = C.VoidPtrTy; 722 break; 723 case WIntTy: { 724 Res = C.getWIntType(); 725 break; 726 } 727 } 728 729 if (Ptr) 730 Res = C.getPointerType(Res); 731 return Res; 732 } 733 734 std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { 735 std::string S = getRepresentativeType(C).getAsString(C.getPrintingPolicy()); 736 737 std::string Alias; 738 if (Name) { 739 // Use a specific name for this type, e.g. "size_t". 740 Alias = Name; 741 if (Ptr) { 742 // If ArgType is actually a pointer to T, append an asterisk. 743 Alias += (Alias[Alias.size()-1] == '*') ? "*" : " *"; 744 } 745 // If Alias is the same as the underlying type, e.g. wchar_t, then drop it. 746 if (S == Alias) 747 Alias.clear(); 748 } 749 750 if (!Alias.empty()) 751 return std::string("'") + Alias + "' (aka '" + S + "')"; 752 return std::string("'") + S + "'"; 753 } 754 755 756 //===----------------------------------------------------------------------===// 757 // Methods on OptionalAmount. 758 //===----------------------------------------------------------------------===// 759 760 ArgType 761 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 762 return Ctx.IntTy; 763 } 764 765 //===----------------------------------------------------------------------===// 766 // Methods on LengthModifier. 767 //===----------------------------------------------------------------------===// 768 769 const char * 770 analyze_format_string::LengthModifier::toString() const { 771 switch (kind) { 772 case AsChar: 773 return "hh"; 774 case AsShort: 775 return "h"; 776 case AsShortLong: 777 return "hl"; 778 case AsLong: // or AsWideChar 779 return "l"; 780 case AsLongLong: 781 return "ll"; 782 case AsQuad: 783 return "q"; 784 case AsIntMax: 785 return "j"; 786 case AsSizeT: 787 return "z"; 788 case AsPtrDiff: 789 return "t"; 790 case AsInt32: 791 return "I32"; 792 case AsInt3264: 793 return "I"; 794 case AsInt64: 795 return "I64"; 796 case AsLongDouble: 797 return "L"; 798 case AsAllocate: 799 return "a"; 800 case AsMAllocate: 801 return "m"; 802 case AsWide: 803 return "w"; 804 case None: 805 return ""; 806 } 807 return nullptr; 808 } 809 810 //===----------------------------------------------------------------------===// 811 // Methods on ConversionSpecifier. 812 //===----------------------------------------------------------------------===// 813 814 const char *ConversionSpecifier::toString() const { 815 switch (kind) { 816 case bArg: return "b"; 817 case BArg: return "B"; 818 case dArg: return "d"; 819 case DArg: return "D"; 820 case iArg: return "i"; 821 case oArg: return "o"; 822 case OArg: return "O"; 823 case uArg: return "u"; 824 case UArg: return "U"; 825 case xArg: return "x"; 826 case XArg: return "X"; 827 case fArg: return "f"; 828 case FArg: return "F"; 829 case eArg: return "e"; 830 case EArg: return "E"; 831 case gArg: return "g"; 832 case GArg: return "G"; 833 case aArg: return "a"; 834 case AArg: return "A"; 835 case cArg: return "c"; 836 case sArg: return "s"; 837 case pArg: return "p"; 838 case PArg: 839 return "P"; 840 case nArg: return "n"; 841 case PercentArg: return "%"; 842 case ScanListArg: return "["; 843 case InvalidSpecifier: return nullptr; 844 845 // POSIX unicode extensions. 846 case CArg: return "C"; 847 case SArg: return "S"; 848 849 // Objective-C specific specifiers. 850 case ObjCObjArg: return "@"; 851 852 // FreeBSD kernel specific specifiers. 853 case FreeBSDbArg: return "b"; 854 case FreeBSDDArg: return "D"; 855 case FreeBSDrArg: return "r"; 856 case FreeBSDyArg: return "y"; 857 858 // GlibC specific specifiers. 859 case PrintErrno: return "m"; 860 861 // MS specific specifiers. 862 case ZArg: return "Z"; 863 864 // ISO/IEC TR 18037 (fixed-point) specific specifiers. 865 case rArg: 866 return "r"; 867 case RArg: 868 return "R"; 869 case kArg: 870 return "k"; 871 case KArg: 872 return "K"; 873 } 874 return nullptr; 875 } 876 877 std::optional<ConversionSpecifier> 878 ConversionSpecifier::getStandardSpecifier() const { 879 ConversionSpecifier::Kind NewKind; 880 881 switch (getKind()) { 882 default: 883 return std::nullopt; 884 case DArg: 885 NewKind = dArg; 886 break; 887 case UArg: 888 NewKind = uArg; 889 break; 890 case OArg: 891 NewKind = oArg; 892 break; 893 } 894 895 ConversionSpecifier FixedCS(*this); 896 FixedCS.setKind(NewKind); 897 return FixedCS; 898 } 899 900 //===----------------------------------------------------------------------===// 901 // Methods on OptionalAmount. 902 //===----------------------------------------------------------------------===// 903 904 void OptionalAmount::toString(raw_ostream &os) const { 905 switch (hs) { 906 case Invalid: 907 case NotSpecified: 908 return; 909 case Arg: 910 if (UsesDotPrefix) 911 os << "."; 912 if (usesPositionalArg()) 913 os << "*" << getPositionalArgIndex() << "$"; 914 else 915 os << "*"; 916 break; 917 case Constant: 918 if (UsesDotPrefix) 919 os << "."; 920 os << amt; 921 break; 922 } 923 } 924 925 bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, 926 const LangOptions &LO) const { 927 switch (LM.getKind()) { 928 case LengthModifier::None: 929 return true; 930 931 // Handle most integer flags 932 case LengthModifier::AsShort: 933 // Length modifier only applies to FP vectors. 934 if (LO.OpenCL && CS.isDoubleArg()) 935 return !VectorNumElts.isInvalid(); 936 937 if (CS.isFixedPointArg()) 938 return true; 939 940 if (Target.getTriple().isOSMSVCRT()) { 941 switch (CS.getKind()) { 942 case ConversionSpecifier::cArg: 943 case ConversionSpecifier::CArg: 944 case ConversionSpecifier::sArg: 945 case ConversionSpecifier::SArg: 946 case ConversionSpecifier::ZArg: 947 return true; 948 default: 949 break; 950 } 951 } 952 [[fallthrough]]; 953 case LengthModifier::AsChar: 954 case LengthModifier::AsLongLong: 955 case LengthModifier::AsQuad: 956 case LengthModifier::AsIntMax: 957 case LengthModifier::AsSizeT: 958 case LengthModifier::AsPtrDiff: 959 switch (CS.getKind()) { 960 case ConversionSpecifier::bArg: 961 case ConversionSpecifier::BArg: 962 case ConversionSpecifier::dArg: 963 case ConversionSpecifier::DArg: 964 case ConversionSpecifier::iArg: 965 case ConversionSpecifier::oArg: 966 case ConversionSpecifier::OArg: 967 case ConversionSpecifier::uArg: 968 case ConversionSpecifier::UArg: 969 case ConversionSpecifier::xArg: 970 case ConversionSpecifier::XArg: 971 case ConversionSpecifier::nArg: 972 return true; 973 case ConversionSpecifier::FreeBSDrArg: 974 case ConversionSpecifier::FreeBSDyArg: 975 return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); 976 default: 977 return false; 978 } 979 980 case LengthModifier::AsShortLong: 981 return LO.OpenCL && !VectorNumElts.isInvalid(); 982 983 // Handle 'l' flag 984 case LengthModifier::AsLong: // or AsWideChar 985 if (CS.isDoubleArg()) { 986 // Invalid for OpenCL FP scalars. 987 if (LO.OpenCL && VectorNumElts.isInvalid()) 988 return false; 989 return true; 990 } 991 992 if (CS.isFixedPointArg()) 993 return true; 994 995 switch (CS.getKind()) { 996 case ConversionSpecifier::bArg: 997 case ConversionSpecifier::BArg: 998 case ConversionSpecifier::dArg: 999 case ConversionSpecifier::DArg: 1000 case ConversionSpecifier::iArg: 1001 case ConversionSpecifier::oArg: 1002 case ConversionSpecifier::OArg: 1003 case ConversionSpecifier::uArg: 1004 case ConversionSpecifier::UArg: 1005 case ConversionSpecifier::xArg: 1006 case ConversionSpecifier::XArg: 1007 case ConversionSpecifier::nArg: 1008 case ConversionSpecifier::cArg: 1009 case ConversionSpecifier::sArg: 1010 case ConversionSpecifier::ScanListArg: 1011 case ConversionSpecifier::ZArg: 1012 return true; 1013 case ConversionSpecifier::FreeBSDrArg: 1014 case ConversionSpecifier::FreeBSDyArg: 1015 return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); 1016 default: 1017 return false; 1018 } 1019 1020 case LengthModifier::AsLongDouble: 1021 switch (CS.getKind()) { 1022 case ConversionSpecifier::aArg: 1023 case ConversionSpecifier::AArg: 1024 case ConversionSpecifier::fArg: 1025 case ConversionSpecifier::FArg: 1026 case ConversionSpecifier::eArg: 1027 case ConversionSpecifier::EArg: 1028 case ConversionSpecifier::gArg: 1029 case ConversionSpecifier::GArg: 1030 return true; 1031 // GNU libc extension. 1032 case ConversionSpecifier::dArg: 1033 case ConversionSpecifier::iArg: 1034 case ConversionSpecifier::oArg: 1035 case ConversionSpecifier::uArg: 1036 case ConversionSpecifier::xArg: 1037 case ConversionSpecifier::XArg: 1038 return !Target.getTriple().isOSDarwin() && 1039 !Target.getTriple().isOSWindows(); 1040 default: 1041 return false; 1042 } 1043 1044 case LengthModifier::AsAllocate: 1045 switch (CS.getKind()) { 1046 case ConversionSpecifier::sArg: 1047 case ConversionSpecifier::SArg: 1048 case ConversionSpecifier::ScanListArg: 1049 return true; 1050 default: 1051 return false; 1052 } 1053 1054 case LengthModifier::AsMAllocate: 1055 switch (CS.getKind()) { 1056 case ConversionSpecifier::cArg: 1057 case ConversionSpecifier::CArg: 1058 case ConversionSpecifier::sArg: 1059 case ConversionSpecifier::SArg: 1060 case ConversionSpecifier::ScanListArg: 1061 return true; 1062 default: 1063 return false; 1064 } 1065 case LengthModifier::AsInt32: 1066 case LengthModifier::AsInt3264: 1067 case LengthModifier::AsInt64: 1068 switch (CS.getKind()) { 1069 case ConversionSpecifier::dArg: 1070 case ConversionSpecifier::iArg: 1071 case ConversionSpecifier::oArg: 1072 case ConversionSpecifier::uArg: 1073 case ConversionSpecifier::xArg: 1074 case ConversionSpecifier::XArg: 1075 return Target.getTriple().isOSMSVCRT(); 1076 default: 1077 return false; 1078 } 1079 case LengthModifier::AsWide: 1080 switch (CS.getKind()) { 1081 case ConversionSpecifier::cArg: 1082 case ConversionSpecifier::CArg: 1083 case ConversionSpecifier::sArg: 1084 case ConversionSpecifier::SArg: 1085 case ConversionSpecifier::ZArg: 1086 return Target.getTriple().isOSMSVCRT(); 1087 default: 1088 return false; 1089 } 1090 } 1091 llvm_unreachable("Invalid LengthModifier Kind!"); 1092 } 1093 1094 bool FormatSpecifier::hasStandardLengthModifier() const { 1095 switch (LM.getKind()) { 1096 case LengthModifier::None: 1097 case LengthModifier::AsChar: 1098 case LengthModifier::AsShort: 1099 case LengthModifier::AsLong: 1100 case LengthModifier::AsLongLong: 1101 case LengthModifier::AsIntMax: 1102 case LengthModifier::AsSizeT: 1103 case LengthModifier::AsPtrDiff: 1104 case LengthModifier::AsLongDouble: 1105 return true; 1106 case LengthModifier::AsAllocate: 1107 case LengthModifier::AsMAllocate: 1108 case LengthModifier::AsQuad: 1109 case LengthModifier::AsInt32: 1110 case LengthModifier::AsInt3264: 1111 case LengthModifier::AsInt64: 1112 case LengthModifier::AsWide: 1113 case LengthModifier::AsShortLong: // ??? 1114 return false; 1115 } 1116 llvm_unreachable("Invalid LengthModifier Kind!"); 1117 } 1118 1119 bool FormatSpecifier::hasStandardConversionSpecifier( 1120 const LangOptions &LangOpt) const { 1121 switch (CS.getKind()) { 1122 case ConversionSpecifier::bArg: 1123 case ConversionSpecifier::BArg: 1124 case ConversionSpecifier::cArg: 1125 case ConversionSpecifier::dArg: 1126 case ConversionSpecifier::iArg: 1127 case ConversionSpecifier::oArg: 1128 case ConversionSpecifier::uArg: 1129 case ConversionSpecifier::xArg: 1130 case ConversionSpecifier::XArg: 1131 case ConversionSpecifier::fArg: 1132 case ConversionSpecifier::FArg: 1133 case ConversionSpecifier::eArg: 1134 case ConversionSpecifier::EArg: 1135 case ConversionSpecifier::gArg: 1136 case ConversionSpecifier::GArg: 1137 case ConversionSpecifier::aArg: 1138 case ConversionSpecifier::AArg: 1139 case ConversionSpecifier::sArg: 1140 case ConversionSpecifier::pArg: 1141 case ConversionSpecifier::nArg: 1142 case ConversionSpecifier::ObjCObjArg: 1143 case ConversionSpecifier::ScanListArg: 1144 case ConversionSpecifier::PercentArg: 1145 case ConversionSpecifier::PArg: 1146 return true; 1147 case ConversionSpecifier::CArg: 1148 case ConversionSpecifier::SArg: 1149 return LangOpt.ObjC; 1150 case ConversionSpecifier::InvalidSpecifier: 1151 case ConversionSpecifier::FreeBSDbArg: 1152 case ConversionSpecifier::FreeBSDDArg: 1153 case ConversionSpecifier::FreeBSDrArg: 1154 case ConversionSpecifier::FreeBSDyArg: 1155 case ConversionSpecifier::PrintErrno: 1156 case ConversionSpecifier::DArg: 1157 case ConversionSpecifier::OArg: 1158 case ConversionSpecifier::UArg: 1159 case ConversionSpecifier::ZArg: 1160 return false; 1161 case ConversionSpecifier::rArg: 1162 case ConversionSpecifier::RArg: 1163 case ConversionSpecifier::kArg: 1164 case ConversionSpecifier::KArg: 1165 return LangOpt.FixedPoint; 1166 } 1167 llvm_unreachable("Invalid ConversionSpecifier Kind!"); 1168 } 1169 1170 bool FormatSpecifier::hasStandardLengthConversionCombination() const { 1171 if (LM.getKind() == LengthModifier::AsLongDouble) { 1172 switch(CS.getKind()) { 1173 case ConversionSpecifier::dArg: 1174 case ConversionSpecifier::iArg: 1175 case ConversionSpecifier::oArg: 1176 case ConversionSpecifier::uArg: 1177 case ConversionSpecifier::xArg: 1178 case ConversionSpecifier::XArg: 1179 return false; 1180 default: 1181 return true; 1182 } 1183 } 1184 return true; 1185 } 1186 1187 std::optional<LengthModifier> 1188 FormatSpecifier::getCorrectedLengthModifier() const { 1189 if (CS.isAnyIntArg() || CS.getKind() == ConversionSpecifier::nArg) { 1190 if (LM.getKind() == LengthModifier::AsLongDouble || 1191 LM.getKind() == LengthModifier::AsQuad) { 1192 LengthModifier FixedLM(LM); 1193 FixedLM.setKind(LengthModifier::AsLongLong); 1194 return FixedLM; 1195 } 1196 } 1197 1198 return std::nullopt; 1199 } 1200 1201 bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, 1202 LengthModifier &LM) { 1203 for (/**/; const auto *TT = QT->getAs<TypedefType>(); 1204 QT = TT->getDecl()->getUnderlyingType()) { 1205 const TypedefNameDecl *Typedef = TT->getDecl(); 1206 const IdentifierInfo *Identifier = Typedef->getIdentifier(); 1207 if (Identifier->getName() == "size_t") { 1208 LM.setKind(LengthModifier::AsSizeT); 1209 return true; 1210 } else if (Identifier->getName() == "ssize_t") { 1211 // Not C99, but common in Unix. 1212 LM.setKind(LengthModifier::AsSizeT); 1213 return true; 1214 } else if (Identifier->getName() == "intmax_t") { 1215 LM.setKind(LengthModifier::AsIntMax); 1216 return true; 1217 } else if (Identifier->getName() == "uintmax_t") { 1218 LM.setKind(LengthModifier::AsIntMax); 1219 return true; 1220 } else if (Identifier->getName() == "ptrdiff_t") { 1221 LM.setKind(LengthModifier::AsPtrDiff); 1222 return true; 1223 } 1224 } 1225 return false; 1226 } 1227