xref: /freebsd/contrib/llvm-project/clang/lib/AST/PrintfFormatString.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in printf and friends.  The structure of format
10 // strings for fprintf() are described in C99 7.19.6.1.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "FormatStringParsing.h"
15 #include "clang/AST/FormatString.h"
16 #include "clang/AST/OSLog.h"
17 #include "clang/Basic/TargetInfo.h"
18 #include "llvm/Support/Regex.h"
19 
20 using clang::analyze_format_string::ArgType;
21 using clang::analyze_format_string::FormatStringHandler;
22 using clang::analyze_format_string::LengthModifier;
23 using clang::analyze_format_string::OptionalAmount;
24 using clang::analyze_format_string::ConversionSpecifier;
25 using clang::analyze_printf::PrintfSpecifier;
26 
27 using namespace clang;
28 
29 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30         PrintfSpecifierResult;
31 
32 //===----------------------------------------------------------------------===//
33 // Methods for parsing format strings.
34 //===----------------------------------------------------------------------===//
35 
36 using analyze_format_string::ParseNonPositionAmount;
37 
ParsePrecision(FormatStringHandler & H,PrintfSpecifier & FS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)38 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39                            const char *Start, const char *&Beg, const char *E,
40                            unsigned *argIndex) {
41   if (argIndex) {
42     FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43   } else {
44     const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45                                            analyze_format_string::PrecisionPos);
46     if (Amt.isInvalid())
47       return true;
48     FS.setPrecision(Amt);
49   }
50   return false;
51 }
52 
ParseObjCFlags(FormatStringHandler & H,PrintfSpecifier & FS,const char * FlagBeg,const char * E,bool Warn)53 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54                            const char *FlagBeg, const char *E, bool Warn) {
55    StringRef Flag(FlagBeg, E - FlagBeg);
56    // Currently there is only one flag.
57    if (Flag == "tt") {
58      FS.setHasObjCTechnicalTerm(FlagBeg);
59      return false;
60    }
61    // Handle either the case of no flag or an invalid flag.
62    if (Warn) {
63      if (Flag == "")
64        H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
65      else
66        H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
67    }
68    return true;
69 }
70 
ParsePrintfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target,bool Warn,bool isFreeBSDKPrintf)71 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72                                                   const char *&Beg,
73                                                   const char *E,
74                                                   unsigned &argIndex,
75                                                   const LangOptions &LO,
76                                                   const TargetInfo &Target,
77                                                   bool Warn,
78                                                   bool isFreeBSDKPrintf) {
79 
80   using namespace clang::analyze_format_string;
81   using namespace clang::analyze_printf;
82 
83   const char *I = Beg;
84   const char *Start = nullptr;
85   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87   // Look for a '%' character that indicates the start of a format specifier.
88   for ( ; I != E ; ++I) {
89     char c = *I;
90     if (c == '\0') {
91       // Detect spurious null characters, which are likely errors.
92       H.HandleNullChar(I);
93       return true;
94     }
95     if (c == '%') {
96       Start = I++;  // Record the start of the format specifier.
97       break;
98     }
99   }
100 
101   // No format specifier found?
102   if (!Start)
103     return false;
104 
105   if (I == E) {
106     // No more characters left?
107     if (Warn)
108       H.HandleIncompleteSpecifier(Start, E - Start);
109     return true;
110   }
111 
112   PrintfSpecifier FS;
113   if (ParseArgPosition(H, FS, Start, I, E))
114     return true;
115 
116   if (I == E) {
117     // No more characters left?
118     if (Warn)
119       H.HandleIncompleteSpecifier(Start, E - Start);
120     return true;
121   }
122 
123   if (*I == '{') {
124     ++I;
125     unsigned char PrivacyFlags = 0;
126     StringRef MatchedStr;
127 
128     do {
129       StringRef Str(I, E - I);
130       std::string Match = "^[[:space:]]*"
131                           "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132                           "[[:space:]]*(,|})";
133       llvm::Regex R(Match);
134       SmallVector<StringRef, 2> Matches;
135 
136       if (R.match(Str, &Matches)) {
137         MatchedStr = Matches[1];
138         I += Matches[0].size();
139 
140         // Set the privacy flag if the privacy annotation in the
141         // comma-delimited segment is at least as strict as the privacy
142         // annotations in previous comma-delimited segments.
143         if (MatchedStr.starts_with("mask")) {
144           StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
145           unsigned Size = MaskType.size();
146           if (Warn && (Size == 0 || Size > 8))
147             H.handleInvalidMaskType(MaskType);
148           FS.setMaskType(MaskType);
149         } else if (MatchedStr == "sensitive")
150           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151         else if (PrivacyFlags !=
152                      clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153                  MatchedStr == "private")
154           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155         else if (PrivacyFlags == 0 && MatchedStr == "public")
156           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157       } else {
158         size_t CommaOrBracePos =
159             Str.find_if([](char c) { return c == ',' || c == '}'; });
160 
161         if (CommaOrBracePos == StringRef::npos) {
162           // Neither a comma nor the closing brace was found.
163           if (Warn)
164             H.HandleIncompleteSpecifier(Start, E - Start);
165           return true;
166         }
167 
168         I += CommaOrBracePos + 1;
169       }
170       // Continue until the closing brace is found.
171     } while (*(I - 1) == ',');
172 
173     // Set the privacy flag.
174     switch (PrivacyFlags) {
175     case 0:
176       break;
177     case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178       FS.setIsPrivate(MatchedStr.data());
179       break;
180     case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181       FS.setIsPublic(MatchedStr.data());
182       break;
183     case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184       FS.setIsSensitive(MatchedStr.data());
185       break;
186     default:
187       llvm_unreachable("Unexpected privacy flag value");
188     }
189   }
190 
191   // Look for flags (if any).
192   bool hasMore = true;
193   for ( ; I != E; ++I) {
194     switch (*I) {
195       default: hasMore = false; break;
196       case '\'':
197         // FIXME: POSIX specific.  Always accept?
198         FS.setHasThousandsGrouping(I);
199         break;
200       case '-': FS.setIsLeftJustified(I); break;
201       case '+': FS.setHasPlusPrefix(I); break;
202       case ' ': FS.setHasSpacePrefix(I); break;
203       case '#': FS.setHasAlternativeForm(I); break;
204       case '0': FS.setHasLeadingZeros(I); break;
205     }
206     if (!hasMore)
207       break;
208   }
209 
210   if (I == E) {
211     // No more characters left?
212     if (Warn)
213       H.HandleIncompleteSpecifier(Start, E - Start);
214     return true;
215   }
216 
217   // Look for the field width (if any).
218   if (ParseFieldWidth(H, FS, Start, I, E,
219                       FS.usesPositionalArg() ? nullptr : &argIndex))
220     return true;
221 
222   if (I == E) {
223     // No more characters left?
224     if (Warn)
225       H.HandleIncompleteSpecifier(Start, E - Start);
226     return true;
227   }
228 
229   // Look for the precision (if any).
230   if (*I == '.') {
231     ++I;
232     if (I == E) {
233       if (Warn)
234         H.HandleIncompleteSpecifier(Start, E - Start);
235       return true;
236     }
237 
238     if (ParsePrecision(H, FS, Start, I, E,
239                        FS.usesPositionalArg() ? nullptr : &argIndex))
240       return true;
241 
242     if (I == E) {
243       // No more characters left?
244       if (Warn)
245         H.HandleIncompleteSpecifier(Start, E - Start);
246       return true;
247     }
248   }
249 
250   if (ParseVectorModifier(H, FS, I, E, LO))
251     return true;
252 
253   // Look for the length modifier.
254   if (ParseLengthModifier(FS, I, E, LO) && I == E) {
255     // No more characters left?
256     if (Warn)
257       H.HandleIncompleteSpecifier(Start, E - Start);
258     return true;
259   }
260 
261   // Look for the Objective-C modifier flags, if any.
262   // We parse these here, even if they don't apply to
263   // the conversion specifier, and then emit an error
264   // later if the conversion specifier isn't '@'.  This
265   // enables better recovery, and we don't know if
266   // these flags are applicable until later.
267   const char *ObjCModifierFlagsStart = nullptr,
268              *ObjCModifierFlagsEnd = nullptr;
269   if (*I == '[') {
270     ObjCModifierFlagsStart = I;
271     ++I;
272     auto flagStart = I;
273     for (;; ++I) {
274       ObjCModifierFlagsEnd = I;
275       if (I == E) {
276         if (Warn)
277           H.HandleIncompleteSpecifier(Start, E - Start);
278         return true;
279       }
280       // Did we find the closing ']'?
281       if (*I == ']') {
282         if (ParseObjCFlags(H, FS, flagStart, I, Warn))
283           return true;
284         ++I;
285         break;
286       }
287       // There are no separators defined yet for multiple
288       // Objective-C modifier flags.  When those are
289       // defined, this is the place to check.
290     }
291   }
292 
293   if (*I == '\0') {
294     // Detect spurious null characters, which are likely errors.
295     H.HandleNullChar(I);
296     return true;
297   }
298 
299   // Finally, look for the conversion specifier.
300   const char *conversionPosition = I++;
301   ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302   switch (*conversionPosition) {
303     default:
304       break;
305     // C99: 7.19.6.1 (section 8).
306     case '%': k = ConversionSpecifier::PercentArg;   break;
307     case 'A': k = ConversionSpecifier::AArg; break;
308     case 'E': k = ConversionSpecifier::EArg; break;
309     case 'F': k = ConversionSpecifier::FArg; break;
310     case 'G': k = ConversionSpecifier::GArg; break;
311     case 'X': k = ConversionSpecifier::XArg; break;
312     case 'a': k = ConversionSpecifier::aArg; break;
313     case 'c': k = ConversionSpecifier::cArg; break;
314     case 'd': k = ConversionSpecifier::dArg; break;
315     case 'e': k = ConversionSpecifier::eArg; break;
316     case 'f': k = ConversionSpecifier::fArg; break;
317     case 'g': k = ConversionSpecifier::gArg; break;
318     case 'i': k = ConversionSpecifier::iArg; break;
319     case 'n':
320       // Not handled, but reserved in OpenCL and FreeBSD kernel.
321       if (!LO.OpenCL && !isFreeBSDKPrintf)
322         k = ConversionSpecifier::nArg;
323       break;
324     case 'o': k = ConversionSpecifier::oArg; break;
325     case 'p': k = ConversionSpecifier::pArg; break;
326     case 's': k = ConversionSpecifier::sArg; break;
327     case 'u': k = ConversionSpecifier::uArg; break;
328     case 'x': k = ConversionSpecifier::xArg; break;
329     // C23.
330     case 'b':
331       if (isFreeBSDKPrintf)
332         k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
333       else
334         k = ConversionSpecifier::bArg;
335       break;
336     case 'B': k = ConversionSpecifier::BArg; break;
337     // POSIX specific.
338     case 'C': k = ConversionSpecifier::CArg; break;
339     case 'S': k = ConversionSpecifier::SArg; break;
340     // Apple extension for os_log
341     case 'P':
342       k = ConversionSpecifier::PArg;
343       break;
344     // Objective-C.
345     case '@': k = ConversionSpecifier::ObjCObjArg; break;
346     // Glibc specific.
347     case 'm': k = ConversionSpecifier::PrintErrno; break;
348     case 'r':
349       if (isFreeBSDKPrintf)
350         k = ConversionSpecifier::FreeBSDrArg; // int
351       else if (LO.FixedPoint)
352         k = ConversionSpecifier::rArg;
353       break;
354     case 'y':
355       if (isFreeBSDKPrintf)
356         k = ConversionSpecifier::FreeBSDyArg; // int
357       break;
358     // Apple-specific.
359     case 'D':
360       if (isFreeBSDKPrintf)
361         k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
362       else if (Target.getTriple().isOSDarwin())
363         k = ConversionSpecifier::DArg;
364       break;
365     case 'O':
366       if (Target.getTriple().isOSDarwin())
367         k = ConversionSpecifier::OArg;
368       break;
369     case 'U':
370       if (Target.getTriple().isOSDarwin())
371         k = ConversionSpecifier::UArg;
372       break;
373     // MS specific.
374     case 'Z':
375       if (Target.getTriple().isOSMSVCRT())
376         k = ConversionSpecifier::ZArg;
377       break;
378     // ISO/IEC TR 18037 (fixed-point) specific.
379     // NOTE: 'r' is handled up above since FreeBSD also supports %r.
380     case 'k':
381       if (LO.FixedPoint)
382         k = ConversionSpecifier::kArg;
383       break;
384     case 'K':
385       if (LO.FixedPoint)
386         k = ConversionSpecifier::KArg;
387       break;
388     case 'R':
389       if (LO.FixedPoint)
390         k = ConversionSpecifier::RArg;
391       break;
392   }
393 
394   // Check to see if we used the Objective-C modifier flags with
395   // a conversion specifier other than '@'.
396   if (k != ConversionSpecifier::ObjCObjArg &&
397       k != ConversionSpecifier::InvalidSpecifier &&
398       ObjCModifierFlagsStart) {
399     H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
400                                            ObjCModifierFlagsEnd + 1,
401                                            conversionPosition);
402     return true;
403   }
404 
405   PrintfConversionSpecifier CS(conversionPosition, k);
406   FS.setConversionSpecifier(CS);
407   if (CS.consumesDataArgument() && !FS.usesPositionalArg())
408     FS.setArgIndex(argIndex++);
409   // FreeBSD kernel specific.
410   if (k == ConversionSpecifier::FreeBSDbArg ||
411       k == ConversionSpecifier::FreeBSDDArg)
412     argIndex++;
413 
414   if (k == ConversionSpecifier::InvalidSpecifier) {
415     unsigned Len = I - Start;
416     if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
417       CS.setEndScanList(Start + Len);
418       FS.setConversionSpecifier(CS);
419     }
420     // Assume the conversion takes one argument.
421     return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
422   }
423   return PrintfSpecifierResult(Start, FS);
424 }
425 
ParsePrintfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target,bool isFreeBSDKPrintf)426 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
427                                                      const char *I,
428                                                      const char *E,
429                                                      const LangOptions &LO,
430                                                      const TargetInfo &Target,
431                                                      bool isFreeBSDKPrintf) {
432 
433   unsigned argIndex = 0;
434 
435   // Keep looking for a format specifier until we have exhausted the string.
436   while (I != E) {
437     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
438                                                             LO, Target, true,
439                                                             isFreeBSDKPrintf);
440     // Did a fail-stop error of any kind occur when parsing the specifier?
441     // If so, don't do any more processing.
442     if (FSR.shouldStop())
443       return true;
444     // Did we exhaust the string or encounter an error that
445     // we can recover from?
446     if (!FSR.hasValue())
447       continue;
448     // We have a format specifier.  Pass it to the callback.
449     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
450                                  I - FSR.getStart(), Target))
451       return true;
452   }
453   assert(I == E && "Format string not exhausted");
454   return false;
455 }
456 
ParseFormatStringHasSArg(const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)457 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
458                                                             const char *E,
459                                                             const LangOptions &LO,
460                                                             const TargetInfo &Target) {
461 
462   unsigned argIndex = 0;
463 
464   // Keep looking for a %s format specifier until we have exhausted the string.
465   FormatStringHandler H;
466   while (I != E) {
467     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
468                                                             LO, Target, false,
469                                                             false);
470     // Did a fail-stop error of any kind occur when parsing the specifier?
471     // If so, don't do any more processing.
472     if (FSR.shouldStop())
473       return false;
474     // Did we exhaust the string or encounter an error that
475     // we can recover from?
476     if (!FSR.hasValue())
477       continue;
478     const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
479     // Return true if this a %s format specifier.
480     if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
481       return true;
482   }
483   return false;
484 }
485 
parseFormatStringHasFormattingSpecifiers(const char * Begin,const char * End,const LangOptions & LO,const TargetInfo & Target)486 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
487     const char *Begin, const char *End, const LangOptions &LO,
488     const TargetInfo &Target) {
489   unsigned ArgIndex = 0;
490   // Keep looking for a formatting specifier until we have exhausted the string.
491   FormatStringHandler H;
492   while (Begin != End) {
493     const PrintfSpecifierResult &FSR =
494         ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
495     if (FSR.shouldStop())
496       break;
497     if (FSR.hasValue())
498       return true;
499   }
500   return false;
501 }
502 
503 //===----------------------------------------------------------------------===//
504 // Methods on PrintfSpecifier.
505 //===----------------------------------------------------------------------===//
506 
getScalarArgType(ASTContext & Ctx,bool IsObjCLiteral) const507 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
508                                           bool IsObjCLiteral) const {
509   if (CS.getKind() == ConversionSpecifier::cArg)
510     switch (LM.getKind()) {
511       case LengthModifier::None:
512         return Ctx.IntTy;
513       case LengthModifier::AsLong:
514       case LengthModifier::AsWide:
515         return ArgType(ArgType::WIntTy, "wint_t");
516       case LengthModifier::AsShort:
517         if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
518           return Ctx.IntTy;
519         [[fallthrough]];
520       default:
521         return ArgType::Invalid();
522     }
523 
524   if (CS.isIntArg())
525     switch (LM.getKind()) {
526       case LengthModifier::AsLongDouble:
527         // GNU extension.
528         return Ctx.LongLongTy;
529       case LengthModifier::None:
530       case LengthModifier::AsShortLong:
531         return Ctx.IntTy;
532       case LengthModifier::AsInt32:
533         return ArgType(Ctx.IntTy, "__int32");
534       case LengthModifier::AsChar:
535         return ArgType::AnyCharTy;
536       case LengthModifier::AsShort: return Ctx.ShortTy;
537       case LengthModifier::AsLong: return Ctx.LongTy;
538       case LengthModifier::AsLongLong:
539       case LengthModifier::AsQuad:
540         return Ctx.LongLongTy;
541       case LengthModifier::AsInt64:
542         return ArgType(Ctx.LongLongTy, "__int64");
543       case LengthModifier::AsIntMax:
544         return ArgType(Ctx.getIntMaxType(), "intmax_t");
545       case LengthModifier::AsSizeT:
546         return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
547       case LengthModifier::AsInt3264:
548         return Ctx.getTargetInfo().getTriple().isArch64Bit()
549                    ? ArgType(Ctx.LongLongTy, "__int64")
550                    : ArgType(Ctx.IntTy, "__int32");
551       case LengthModifier::AsPtrDiff:
552         return ArgType::makePtrdiffT(
553             ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
554       case LengthModifier::AsAllocate:
555       case LengthModifier::AsMAllocate:
556       case LengthModifier::AsWide:
557         return ArgType::Invalid();
558     }
559 
560   if (CS.isUIntArg())
561     switch (LM.getKind()) {
562       case LengthModifier::AsLongDouble:
563         // GNU extension.
564         return Ctx.UnsignedLongLongTy;
565       case LengthModifier::None:
566       case LengthModifier::AsShortLong:
567         return Ctx.UnsignedIntTy;
568       case LengthModifier::AsInt32:
569         return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
570       case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
571       case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
572       case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
573       case LengthModifier::AsLongLong:
574       case LengthModifier::AsQuad:
575         return Ctx.UnsignedLongLongTy;
576       case LengthModifier::AsInt64:
577         return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
578       case LengthModifier::AsIntMax:
579         return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
580       case LengthModifier::AsSizeT:
581         return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
582       case LengthModifier::AsInt3264:
583         return Ctx.getTargetInfo().getTriple().isArch64Bit()
584                    ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
585                    : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
586       case LengthModifier::AsPtrDiff:
587         return ArgType::makePtrdiffT(
588             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
589       case LengthModifier::AsAllocate:
590       case LengthModifier::AsMAllocate:
591       case LengthModifier::AsWide:
592         return ArgType::Invalid();
593     }
594 
595   if (CS.isDoubleArg()) {
596     if (!VectorNumElts.isInvalid()) {
597       switch (LM.getKind()) {
598       case LengthModifier::AsShort:
599         return Ctx.HalfTy;
600       case LengthModifier::AsShortLong:
601         return Ctx.FloatTy;
602       case LengthModifier::AsLong:
603       default:
604         return Ctx.DoubleTy;
605       }
606     }
607 
608     if (LM.getKind() == LengthModifier::AsLongDouble)
609       return Ctx.LongDoubleTy;
610     return Ctx.DoubleTy;
611   }
612 
613   if (CS.getKind() == ConversionSpecifier::nArg) {
614     switch (LM.getKind()) {
615       case LengthModifier::None:
616         return ArgType::PtrTo(Ctx.IntTy);
617       case LengthModifier::AsChar:
618         return ArgType::PtrTo(Ctx.SignedCharTy);
619       case LengthModifier::AsShort:
620         return ArgType::PtrTo(Ctx.ShortTy);
621       case LengthModifier::AsLong:
622         return ArgType::PtrTo(Ctx.LongTy);
623       case LengthModifier::AsLongLong:
624       case LengthModifier::AsQuad:
625         return ArgType::PtrTo(Ctx.LongLongTy);
626       case LengthModifier::AsIntMax:
627         return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
628       case LengthModifier::AsSizeT:
629         return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
630       case LengthModifier::AsPtrDiff:
631         return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
632       case LengthModifier::AsLongDouble:
633         return ArgType(); // FIXME: Is this a known extension?
634       case LengthModifier::AsAllocate:
635       case LengthModifier::AsMAllocate:
636       case LengthModifier::AsInt32:
637       case LengthModifier::AsInt3264:
638       case LengthModifier::AsInt64:
639       case LengthModifier::AsWide:
640         return ArgType::Invalid();
641       case LengthModifier::AsShortLong:
642         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
643     }
644   }
645 
646   if (CS.isFixedPointArg() && !Ctx.getLangOpts().FixedPoint)
647     return ArgType::Invalid();
648 
649   switch (CS.getKind()) {
650     case ConversionSpecifier::sArg:
651       if (LM.getKind() == LengthModifier::AsWideChar) {
652         if (IsObjCLiteral)
653           return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
654                          "const unichar *");
655         return ArgType(ArgType::WCStrTy, "wchar_t *");
656       }
657       if (LM.getKind() == LengthModifier::AsWide)
658         return ArgType(ArgType::WCStrTy, "wchar_t *");
659       return ArgType::CStrTy;
660     case ConversionSpecifier::SArg:
661       if (IsObjCLiteral)
662         return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
663                        "const unichar *");
664       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
665           LM.getKind() == LengthModifier::AsShort)
666         return ArgType::CStrTy;
667       return ArgType(ArgType::WCStrTy, "wchar_t *");
668     case ConversionSpecifier::CArg:
669       if (IsObjCLiteral)
670         return ArgType(Ctx.UnsignedShortTy, "unichar");
671       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
672           LM.getKind() == LengthModifier::AsShort)
673         return Ctx.IntTy;
674       return ArgType(Ctx.WideCharTy, "wchar_t");
675     case ConversionSpecifier::pArg:
676     case ConversionSpecifier::PArg:
677       return ArgType::CPointerTy;
678     case ConversionSpecifier::ObjCObjArg:
679       return ArgType::ObjCPointerTy;
680     case ConversionSpecifier::kArg:
681       switch (LM.getKind()) {
682       case LengthModifier::None:
683         return Ctx.AccumTy;
684       case LengthModifier::AsShort:
685         return Ctx.ShortAccumTy;
686       case LengthModifier::AsLong:
687         return Ctx.LongAccumTy;
688       default:
689         return ArgType::Invalid();
690       }
691     case ConversionSpecifier::KArg:
692       switch (LM.getKind()) {
693       case LengthModifier::None:
694         return Ctx.UnsignedAccumTy;
695       case LengthModifier::AsShort:
696         return Ctx.UnsignedShortAccumTy;
697       case LengthModifier::AsLong:
698         return Ctx.UnsignedLongAccumTy;
699       default:
700         return ArgType::Invalid();
701       }
702     case ConversionSpecifier::rArg:
703       switch (LM.getKind()) {
704       case LengthModifier::None:
705         return Ctx.FractTy;
706       case LengthModifier::AsShort:
707         return Ctx.ShortFractTy;
708       case LengthModifier::AsLong:
709         return Ctx.LongFractTy;
710       default:
711         return ArgType::Invalid();
712       }
713     case ConversionSpecifier::RArg:
714       switch (LM.getKind()) {
715       case LengthModifier::None:
716         return Ctx.UnsignedFractTy;
717       case LengthModifier::AsShort:
718         return Ctx.UnsignedShortFractTy;
719       case LengthModifier::AsLong:
720         return Ctx.UnsignedLongFractTy;
721       default:
722         return ArgType::Invalid();
723       }
724     default:
725       break;
726   }
727 
728   // FIXME: Handle other cases.
729   return ArgType();
730 }
731 
732 
getArgType(ASTContext & Ctx,bool IsObjCLiteral) const733 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
734                                     bool IsObjCLiteral) const {
735   const PrintfConversionSpecifier &CS = getConversionSpecifier();
736 
737   if (!CS.consumesDataArgument())
738     return ArgType::Invalid();
739 
740   ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
741   if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
742     return ScalarTy;
743 
744   return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
745 }
746 
fixType(QualType QT,const LangOptions & LangOpt,ASTContext & Ctx,bool IsObjCLiteral)747 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
748                               ASTContext &Ctx, bool IsObjCLiteral) {
749   // %n is different from other conversion specifiers; don't try to fix it.
750   if (CS.getKind() == ConversionSpecifier::nArg)
751     return false;
752 
753   // Handle Objective-C objects first. Note that while the '%@' specifier will
754   // not warn for structure pointer or void pointer arguments (because that's
755   // how CoreFoundation objects are implemented), we only show a fixit for '%@'
756   // if we know it's an object (block, id, class, or __attribute__((NSObject))).
757   if (QT->isObjCRetainableType()) {
758     if (!IsObjCLiteral)
759       return false;
760 
761     CS.setKind(ConversionSpecifier::ObjCObjArg);
762 
763     // Disable irrelevant flags
764     HasThousandsGrouping = false;
765     HasPlusPrefix = false;
766     HasSpacePrefix = false;
767     HasAlternativeForm = false;
768     HasLeadingZeroes = false;
769     Precision.setHowSpecified(OptionalAmount::NotSpecified);
770     LM.setKind(LengthModifier::None);
771 
772     return true;
773   }
774 
775   // Handle strings next (char *, wchar_t *)
776   if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
777     CS.setKind(ConversionSpecifier::sArg);
778 
779     // Disable irrelevant flags
780     HasAlternativeForm = false;
781     HasLeadingZeroes = false;
782 
783     // Set the long length modifier for wide characters
784     if (QT->getPointeeType()->isWideCharType())
785       LM.setKind(LengthModifier::AsWideChar);
786     else
787       LM.setKind(LengthModifier::None);
788 
789     return true;
790   }
791 
792   // If it's an enum, get its underlying type.
793   if (const EnumType *ETy = QT->getAs<EnumType>())
794     QT = ETy->getDecl()->getIntegerType();
795 
796   const BuiltinType *BT = QT->getAs<BuiltinType>();
797   if (!BT) {
798     const VectorType *VT = QT->getAs<VectorType>();
799     if (VT) {
800       QT = VT->getElementType();
801       BT = QT->getAs<BuiltinType>();
802       VectorNumElts = OptionalAmount(VT->getNumElements());
803     }
804   }
805 
806   // We can only work with builtin types.
807   if (!BT)
808     return false;
809 
810   // Set length modifier
811   switch (BT->getKind()) {
812   case BuiltinType::Bool:
813   case BuiltinType::WChar_U:
814   case BuiltinType::WChar_S:
815   case BuiltinType::Char8: // FIXME: Treat like 'char'?
816   case BuiltinType::Char16:
817   case BuiltinType::Char32:
818   case BuiltinType::UInt128:
819   case BuiltinType::Int128:
820   case BuiltinType::Half:
821   case BuiltinType::BFloat16:
822   case BuiltinType::Float16:
823   case BuiltinType::Float128:
824   case BuiltinType::Ibm128:
825   case BuiltinType::ShortAccum:
826   case BuiltinType::Accum:
827   case BuiltinType::LongAccum:
828   case BuiltinType::UShortAccum:
829   case BuiltinType::UAccum:
830   case BuiltinType::ULongAccum:
831   case BuiltinType::ShortFract:
832   case BuiltinType::Fract:
833   case BuiltinType::LongFract:
834   case BuiltinType::UShortFract:
835   case BuiltinType::UFract:
836   case BuiltinType::ULongFract:
837   case BuiltinType::SatShortAccum:
838   case BuiltinType::SatAccum:
839   case BuiltinType::SatLongAccum:
840   case BuiltinType::SatUShortAccum:
841   case BuiltinType::SatUAccum:
842   case BuiltinType::SatULongAccum:
843   case BuiltinType::SatShortFract:
844   case BuiltinType::SatFract:
845   case BuiltinType::SatLongFract:
846   case BuiltinType::SatUShortFract:
847   case BuiltinType::SatUFract:
848   case BuiltinType::SatULongFract:
849     // Various types which are non-trivial to correct.
850     return false;
851 
852 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
853   case BuiltinType::Id:
854 #include "clang/Basic/OpenCLImageTypes.def"
855 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
856   case BuiltinType::Id:
857 #include "clang/Basic/OpenCLExtensionTypes.def"
858 #define SVE_TYPE(Name, Id, SingletonId) \
859   case BuiltinType::Id:
860 #include "clang/Basic/AArch64SVEACLETypes.def"
861 #define PPC_VECTOR_TYPE(Name, Id, Size) \
862   case BuiltinType::Id:
863 #include "clang/Basic/PPCTypes.def"
864 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
865 #include "clang/Basic/RISCVVTypes.def"
866 #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
867 #include "clang/Basic/WebAssemblyReferenceTypes.def"
868 #define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
869 #include "clang/Basic/AMDGPUTypes.def"
870 #define SIGNED_TYPE(Id, SingletonId)
871 #define UNSIGNED_TYPE(Id, SingletonId)
872 #define FLOATING_TYPE(Id, SingletonId)
873 #define BUILTIN_TYPE(Id, SingletonId) \
874   case BuiltinType::Id:
875 #include "clang/AST/BuiltinTypes.def"
876     // Misc other stuff which doesn't make sense here.
877     return false;
878 
879   case BuiltinType::UInt:
880   case BuiltinType::Int:
881   case BuiltinType::Float:
882     LM.setKind(VectorNumElts.isInvalid() ?
883                LengthModifier::None : LengthModifier::AsShortLong);
884     break;
885   case BuiltinType::Double:
886     LM.setKind(VectorNumElts.isInvalid() ?
887                LengthModifier::None : LengthModifier::AsLong);
888     break;
889   case BuiltinType::Char_U:
890   case BuiltinType::UChar:
891   case BuiltinType::Char_S:
892   case BuiltinType::SChar:
893     LM.setKind(LengthModifier::AsChar);
894     break;
895 
896   case BuiltinType::Short:
897   case BuiltinType::UShort:
898     LM.setKind(LengthModifier::AsShort);
899     break;
900 
901   case BuiltinType::Long:
902   case BuiltinType::ULong:
903     LM.setKind(LengthModifier::AsLong);
904     break;
905 
906   case BuiltinType::LongLong:
907   case BuiltinType::ULongLong:
908     LM.setKind(LengthModifier::AsLongLong);
909     break;
910 
911   case BuiltinType::LongDouble:
912     LM.setKind(LengthModifier::AsLongDouble);
913     break;
914   }
915 
916   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
917   if (LangOpt.C99 || LangOpt.CPlusPlus11)
918     namedTypeToLengthModifier(QT, LM);
919 
920   // If fixing the length modifier was enough, we might be done.
921   if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
922     // If we're going to offer a fix anyway, make sure the sign matches.
923     switch (CS.getKind()) {
924     case ConversionSpecifier::uArg:
925     case ConversionSpecifier::UArg:
926       if (QT->isSignedIntegerType())
927         CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
928       break;
929     case ConversionSpecifier::dArg:
930     case ConversionSpecifier::DArg:
931     case ConversionSpecifier::iArg:
932       if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
933         CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
934       break;
935     default:
936       // Other specifiers do not have signed/unsigned variants.
937       break;
938     }
939 
940     const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
941     if (ATR.isValid() && ATR.matchesType(Ctx, QT))
942       return true;
943   }
944 
945   // Set conversion specifier and disable any flags which do not apply to it.
946   // Let typedefs to char fall through to int, as %c is silly for uint8_t.
947   if (!QT->getAs<TypedefType>() && QT->isCharType()) {
948     CS.setKind(ConversionSpecifier::cArg);
949     LM.setKind(LengthModifier::None);
950     Precision.setHowSpecified(OptionalAmount::NotSpecified);
951     HasAlternativeForm = false;
952     HasLeadingZeroes = false;
953     HasPlusPrefix = false;
954   }
955   // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
956   else if (QT->isRealFloatingType()) {
957     CS.setKind(ConversionSpecifier::fArg);
958   } else if (QT->isSignedIntegerType()) {
959     CS.setKind(ConversionSpecifier::dArg);
960     HasAlternativeForm = false;
961   } else if (QT->isUnsignedIntegerType()) {
962     CS.setKind(ConversionSpecifier::uArg);
963     HasAlternativeForm = false;
964     HasPlusPrefix = false;
965   } else {
966     llvm_unreachable("Unexpected type");
967   }
968 
969   return true;
970 }
971 
toString(raw_ostream & os) const972 void PrintfSpecifier::toString(raw_ostream &os) const {
973   // Whilst some features have no defined order, we are using the order
974   // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
975   os << "%";
976 
977   // Positional args
978   if (usesPositionalArg()) {
979     os << getPositionalArgIndex() << "$";
980   }
981 
982   // Conversion flags
983   if (IsLeftJustified)    os << "-";
984   if (HasPlusPrefix)      os << "+";
985   if (HasSpacePrefix)     os << " ";
986   if (HasAlternativeForm) os << "#";
987   if (HasLeadingZeroes)   os << "0";
988 
989   // Minimum field width
990   FieldWidth.toString(os);
991   // Precision
992   Precision.toString(os);
993 
994   // Vector modifier
995   if (!VectorNumElts.isInvalid())
996     os << 'v' << VectorNumElts.getConstantAmount();
997 
998   // Length modifier
999   os << LM.toString();
1000   // Conversion specifier
1001   os << CS.toString();
1002 }
1003 
hasValidPlusPrefix() const1004 bool PrintfSpecifier::hasValidPlusPrefix() const {
1005   if (!HasPlusPrefix)
1006     return true;
1007 
1008   // The plus prefix only makes sense for signed conversions
1009   switch (CS.getKind()) {
1010   case ConversionSpecifier::dArg:
1011   case ConversionSpecifier::DArg:
1012   case ConversionSpecifier::iArg:
1013   case ConversionSpecifier::fArg:
1014   case ConversionSpecifier::FArg:
1015   case ConversionSpecifier::eArg:
1016   case ConversionSpecifier::EArg:
1017   case ConversionSpecifier::gArg:
1018   case ConversionSpecifier::GArg:
1019   case ConversionSpecifier::aArg:
1020   case ConversionSpecifier::AArg:
1021   case ConversionSpecifier::FreeBSDrArg:
1022   case ConversionSpecifier::FreeBSDyArg:
1023   case ConversionSpecifier::rArg:
1024   case ConversionSpecifier::kArg:
1025     return true;
1026 
1027   default:
1028     return false;
1029   }
1030 }
1031 
hasValidAlternativeForm() const1032 bool PrintfSpecifier::hasValidAlternativeForm() const {
1033   if (!HasAlternativeForm)
1034     return true;
1035 
1036   // Alternate form flag only valid with the bBoxXaAeEfFgGrRkK conversions
1037   switch (CS.getKind()) {
1038   case ConversionSpecifier::bArg:
1039   case ConversionSpecifier::BArg:
1040   case ConversionSpecifier::oArg:
1041   case ConversionSpecifier::OArg:
1042   case ConversionSpecifier::xArg:
1043   case ConversionSpecifier::XArg:
1044   case ConversionSpecifier::aArg:
1045   case ConversionSpecifier::AArg:
1046   case ConversionSpecifier::eArg:
1047   case ConversionSpecifier::EArg:
1048   case ConversionSpecifier::fArg:
1049   case ConversionSpecifier::FArg:
1050   case ConversionSpecifier::gArg:
1051   case ConversionSpecifier::GArg:
1052   case ConversionSpecifier::FreeBSDrArg:
1053   case ConversionSpecifier::FreeBSDyArg:
1054   case ConversionSpecifier::rArg:
1055   case ConversionSpecifier::RArg:
1056   case ConversionSpecifier::kArg:
1057   case ConversionSpecifier::KArg:
1058     return true;
1059 
1060   default:
1061     return false;
1062   }
1063 }
1064 
hasValidLeadingZeros() const1065 bool PrintfSpecifier::hasValidLeadingZeros() const {
1066   if (!HasLeadingZeroes)
1067     return true;
1068 
1069   // Leading zeroes flag only valid with the bBdiouxXaAeEfFgGrRkK conversions
1070   switch (CS.getKind()) {
1071   case ConversionSpecifier::bArg:
1072   case ConversionSpecifier::BArg:
1073   case ConversionSpecifier::dArg:
1074   case ConversionSpecifier::DArg:
1075   case ConversionSpecifier::iArg:
1076   case ConversionSpecifier::oArg:
1077   case ConversionSpecifier::OArg:
1078   case ConversionSpecifier::uArg:
1079   case ConversionSpecifier::UArg:
1080   case ConversionSpecifier::xArg:
1081   case ConversionSpecifier::XArg:
1082   case ConversionSpecifier::aArg:
1083   case ConversionSpecifier::AArg:
1084   case ConversionSpecifier::eArg:
1085   case ConversionSpecifier::EArg:
1086   case ConversionSpecifier::fArg:
1087   case ConversionSpecifier::FArg:
1088   case ConversionSpecifier::gArg:
1089   case ConversionSpecifier::GArg:
1090   case ConversionSpecifier::FreeBSDrArg:
1091   case ConversionSpecifier::FreeBSDyArg:
1092   case ConversionSpecifier::rArg:
1093   case ConversionSpecifier::RArg:
1094   case ConversionSpecifier::kArg:
1095   case ConversionSpecifier::KArg:
1096     return true;
1097 
1098   default:
1099     return false;
1100   }
1101 }
1102 
hasValidSpacePrefix() const1103 bool PrintfSpecifier::hasValidSpacePrefix() const {
1104   if (!HasSpacePrefix)
1105     return true;
1106 
1107   // The space prefix only makes sense for signed conversions
1108   switch (CS.getKind()) {
1109   case ConversionSpecifier::dArg:
1110   case ConversionSpecifier::DArg:
1111   case ConversionSpecifier::iArg:
1112   case ConversionSpecifier::fArg:
1113   case ConversionSpecifier::FArg:
1114   case ConversionSpecifier::eArg:
1115   case ConversionSpecifier::EArg:
1116   case ConversionSpecifier::gArg:
1117   case ConversionSpecifier::GArg:
1118   case ConversionSpecifier::aArg:
1119   case ConversionSpecifier::AArg:
1120   case ConversionSpecifier::FreeBSDrArg:
1121   case ConversionSpecifier::FreeBSDyArg:
1122   case ConversionSpecifier::rArg:
1123   case ConversionSpecifier::kArg:
1124     return true;
1125 
1126   default:
1127     return false;
1128   }
1129 }
1130 
hasValidLeftJustified() const1131 bool PrintfSpecifier::hasValidLeftJustified() const {
1132   if (!IsLeftJustified)
1133     return true;
1134 
1135   // The left justified flag is valid for all conversions except n
1136   switch (CS.getKind()) {
1137   case ConversionSpecifier::nArg:
1138     return false;
1139 
1140   default:
1141     return true;
1142   }
1143 }
1144 
hasValidThousandsGroupingPrefix() const1145 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1146   if (!HasThousandsGrouping)
1147     return true;
1148 
1149   switch (CS.getKind()) {
1150     case ConversionSpecifier::dArg:
1151     case ConversionSpecifier::DArg:
1152     case ConversionSpecifier::iArg:
1153     case ConversionSpecifier::uArg:
1154     case ConversionSpecifier::UArg:
1155     case ConversionSpecifier::fArg:
1156     case ConversionSpecifier::FArg:
1157     case ConversionSpecifier::gArg:
1158     case ConversionSpecifier::GArg:
1159       return true;
1160     default:
1161       return false;
1162   }
1163 }
1164 
hasValidPrecision() const1165 bool PrintfSpecifier::hasValidPrecision() const {
1166   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1167     return true;
1168 
1169   // Precision is only valid with the bBdiouxXaAeEfFgGsPrRkK conversions
1170   switch (CS.getKind()) {
1171   case ConversionSpecifier::bArg:
1172   case ConversionSpecifier::BArg:
1173   case ConversionSpecifier::dArg:
1174   case ConversionSpecifier::DArg:
1175   case ConversionSpecifier::iArg:
1176   case ConversionSpecifier::oArg:
1177   case ConversionSpecifier::OArg:
1178   case ConversionSpecifier::uArg:
1179   case ConversionSpecifier::UArg:
1180   case ConversionSpecifier::xArg:
1181   case ConversionSpecifier::XArg:
1182   case ConversionSpecifier::aArg:
1183   case ConversionSpecifier::AArg:
1184   case ConversionSpecifier::eArg:
1185   case ConversionSpecifier::EArg:
1186   case ConversionSpecifier::fArg:
1187   case ConversionSpecifier::FArg:
1188   case ConversionSpecifier::gArg:
1189   case ConversionSpecifier::GArg:
1190   case ConversionSpecifier::sArg:
1191   case ConversionSpecifier::FreeBSDrArg:
1192   case ConversionSpecifier::FreeBSDyArg:
1193   case ConversionSpecifier::PArg:
1194   case ConversionSpecifier::rArg:
1195   case ConversionSpecifier::RArg:
1196   case ConversionSpecifier::kArg:
1197   case ConversionSpecifier::KArg:
1198     return true;
1199 
1200   default:
1201     return false;
1202   }
1203 }
hasValidFieldWidth() const1204 bool PrintfSpecifier::hasValidFieldWidth() const {
1205   if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1206       return true;
1207 
1208   // The field width is valid for all conversions except n
1209   switch (CS.getKind()) {
1210   case ConversionSpecifier::nArg:
1211     return false;
1212 
1213   default:
1214     return true;
1215   }
1216 }
1217