1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in printf and friends. The structure of format
10 // strings for fprintf() are described in C99 7.19.6.1.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "FormatStringParsing.h"
15 #include "clang/AST/FormatString.h"
16 #include "clang/AST/OSLog.h"
17 #include "clang/Basic/TargetInfo.h"
18 #include "llvm/Support/Regex.h"
19
20 using clang::analyze_format_string::ArgType;
21 using clang::analyze_format_string::FormatStringHandler;
22 using clang::analyze_format_string::LengthModifier;
23 using clang::analyze_format_string::OptionalAmount;
24 using clang::analyze_format_string::ConversionSpecifier;
25 using clang::analyze_printf::PrintfSpecifier;
26
27 using namespace clang;
28
29 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30 PrintfSpecifierResult;
31
32 //===----------------------------------------------------------------------===//
33 // Methods for parsing format strings.
34 //===----------------------------------------------------------------------===//
35
36 using analyze_format_string::ParseNonPositionAmount;
37
ParsePrecision(FormatStringHandler & H,PrintfSpecifier & FS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)38 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39 const char *Start, const char *&Beg, const char *E,
40 unsigned *argIndex) {
41 if (argIndex) {
42 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43 } else {
44 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45 analyze_format_string::PrecisionPos);
46 if (Amt.isInvalid())
47 return true;
48 FS.setPrecision(Amt);
49 }
50 return false;
51 }
52
ParseObjCFlags(FormatStringHandler & H,PrintfSpecifier & FS,const char * FlagBeg,const char * E,bool Warn)53 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54 const char *FlagBeg, const char *E, bool Warn) {
55 StringRef Flag(FlagBeg, E - FlagBeg);
56 // Currently there is only one flag.
57 if (Flag == "tt") {
58 FS.setHasObjCTechnicalTerm(FlagBeg);
59 return false;
60 }
61 // Handle either the case of no flag or an invalid flag.
62 if (Warn) {
63 if (Flag == "")
64 H.HandleEmptyObjCModifierFlag(FlagBeg, E - FlagBeg);
65 else
66 H.HandleInvalidObjCModifierFlag(FlagBeg, E - FlagBeg);
67 }
68 return true;
69 }
70
ParsePrintfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target,bool Warn,bool isFreeBSDKPrintf)71 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72 const char *&Beg,
73 const char *E,
74 unsigned &argIndex,
75 const LangOptions &LO,
76 const TargetInfo &Target,
77 bool Warn,
78 bool isFreeBSDKPrintf) {
79
80 using namespace clang::analyze_format_string;
81 using namespace clang::analyze_printf;
82
83 const char *I = Beg;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
89 char c = *I;
90 if (c == '\0') {
91 // Detect spurious null characters, which are likely errors.
92 H.HandleNullChar(I);
93 return true;
94 }
95 if (c == '%') {
96 Start = I++; // Record the start of the format specifier.
97 break;
98 }
99 }
100
101 // No format specifier found?
102 if (!Start)
103 return false;
104
105 if (I == E) {
106 // No more characters left?
107 if (Warn)
108 H.HandleIncompleteSpecifier(Start, E - Start);
109 return true;
110 }
111
112 PrintfSpecifier FS;
113 if (ParseArgPosition(H, FS, Start, I, E))
114 return true;
115
116 if (I == E) {
117 // No more characters left?
118 if (Warn)
119 H.HandleIncompleteSpecifier(Start, E - Start);
120 return true;
121 }
122
123 if (*I == '{') {
124 ++I;
125 unsigned char PrivacyFlags = 0;
126 StringRef MatchedStr;
127
128 do {
129 StringRef Str(I, E - I);
130 std::string Match = "^[[:space:]]*"
131 "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132 "[[:space:]]*(,|})";
133 llvm::Regex R(Match);
134 SmallVector<StringRef, 2> Matches;
135
136 if (R.match(Str, &Matches)) {
137 MatchedStr = Matches[1];
138 I += Matches[0].size();
139
140 // Set the privacy flag if the privacy annotation in the
141 // comma-delimited segment is at least as strict as the privacy
142 // annotations in previous comma-delimited segments.
143 if (MatchedStr.starts_with("mask")) {
144 StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
145 unsigned Size = MaskType.size();
146 if (Warn && (Size == 0 || Size > 8))
147 H.handleInvalidMaskType(MaskType);
148 FS.setMaskType(MaskType);
149 } else if (MatchedStr == "sensitive")
150 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151 else if (PrivacyFlags !=
152 clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153 MatchedStr == "private")
154 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155 else if (PrivacyFlags == 0 && MatchedStr == "public")
156 PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157 } else {
158 size_t CommaOrBracePos =
159 Str.find_if([](char c) { return c == ',' || c == '}'; });
160
161 if (CommaOrBracePos == StringRef::npos) {
162 // Neither a comma nor the closing brace was found.
163 if (Warn)
164 H.HandleIncompleteSpecifier(Start, E - Start);
165 return true;
166 }
167
168 I += CommaOrBracePos + 1;
169 }
170 // Continue until the closing brace is found.
171 } while (*(I - 1) == ',');
172
173 // Set the privacy flag.
174 switch (PrivacyFlags) {
175 case 0:
176 break;
177 case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178 FS.setIsPrivate(MatchedStr.data());
179 break;
180 case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181 FS.setIsPublic(MatchedStr.data());
182 break;
183 case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184 FS.setIsSensitive(MatchedStr.data());
185 break;
186 default:
187 llvm_unreachable("Unexpected privacy flag value");
188 }
189 }
190
191 // Look for flags (if any).
192 bool hasMore = true;
193 for ( ; I != E; ++I) {
194 switch (*I) {
195 default: hasMore = false; break;
196 case '\'':
197 // FIXME: POSIX specific. Always accept?
198 FS.setHasThousandsGrouping(I);
199 break;
200 case '-': FS.setIsLeftJustified(I); break;
201 case '+': FS.setHasPlusPrefix(I); break;
202 case ' ': FS.setHasSpacePrefix(I); break;
203 case '#': FS.setHasAlternativeForm(I); break;
204 case '0': FS.setHasLeadingZeros(I); break;
205 }
206 if (!hasMore)
207 break;
208 }
209
210 if (I == E) {
211 // No more characters left?
212 if (Warn)
213 H.HandleIncompleteSpecifier(Start, E - Start);
214 return true;
215 }
216
217 // Look for the field width (if any).
218 if (ParseFieldWidth(H, FS, Start, I, E,
219 FS.usesPositionalArg() ? nullptr : &argIndex))
220 return true;
221
222 if (I == E) {
223 // No more characters left?
224 if (Warn)
225 H.HandleIncompleteSpecifier(Start, E - Start);
226 return true;
227 }
228
229 // Look for the precision (if any).
230 if (*I == '.') {
231 ++I;
232 if (I == E) {
233 if (Warn)
234 H.HandleIncompleteSpecifier(Start, E - Start);
235 return true;
236 }
237
238 if (ParsePrecision(H, FS, Start, I, E,
239 FS.usesPositionalArg() ? nullptr : &argIndex))
240 return true;
241
242 if (I == E) {
243 // No more characters left?
244 if (Warn)
245 H.HandleIncompleteSpecifier(Start, E - Start);
246 return true;
247 }
248 }
249
250 if (ParseVectorModifier(H, FS, I, E, LO))
251 return true;
252
253 // Look for the length modifier.
254 if (ParseLengthModifier(FS, I, E, LO) && I == E) {
255 // No more characters left?
256 if (Warn)
257 H.HandleIncompleteSpecifier(Start, E - Start);
258 return true;
259 }
260
261 // Look for the Objective-C modifier flags, if any.
262 // We parse these here, even if they don't apply to
263 // the conversion specifier, and then emit an error
264 // later if the conversion specifier isn't '@'. This
265 // enables better recovery, and we don't know if
266 // these flags are applicable until later.
267 const char *ObjCModifierFlagsStart = nullptr,
268 *ObjCModifierFlagsEnd = nullptr;
269 if (*I == '[') {
270 ObjCModifierFlagsStart = I;
271 ++I;
272 auto flagStart = I;
273 for (;; ++I) {
274 ObjCModifierFlagsEnd = I;
275 if (I == E) {
276 if (Warn)
277 H.HandleIncompleteSpecifier(Start, E - Start);
278 return true;
279 }
280 // Did we find the closing ']'?
281 if (*I == ']') {
282 if (ParseObjCFlags(H, FS, flagStart, I, Warn))
283 return true;
284 ++I;
285 break;
286 }
287 // There are no separators defined yet for multiple
288 // Objective-C modifier flags. When those are
289 // defined, this is the place to check.
290 }
291 }
292
293 if (*I == '\0') {
294 // Detect spurious null characters, which are likely errors.
295 H.HandleNullChar(I);
296 return true;
297 }
298
299 // Finally, look for the conversion specifier.
300 const char *conversionPosition = I++;
301 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302 switch (*conversionPosition) {
303 default:
304 break;
305 // C99: 7.19.6.1 (section 8).
306 case '%': k = ConversionSpecifier::PercentArg; break;
307 case 'A': k = ConversionSpecifier::AArg; break;
308 case 'E': k = ConversionSpecifier::EArg; break;
309 case 'F': k = ConversionSpecifier::FArg; break;
310 case 'G': k = ConversionSpecifier::GArg; break;
311 case 'X': k = ConversionSpecifier::XArg; break;
312 case 'a': k = ConversionSpecifier::aArg; break;
313 case 'c': k = ConversionSpecifier::cArg; break;
314 case 'd': k = ConversionSpecifier::dArg; break;
315 case 'e': k = ConversionSpecifier::eArg; break;
316 case 'f': k = ConversionSpecifier::fArg; break;
317 case 'g': k = ConversionSpecifier::gArg; break;
318 case 'i': k = ConversionSpecifier::iArg; break;
319 case 'n':
320 // Not handled, but reserved in OpenCL and FreeBSD kernel.
321 if (!LO.OpenCL && !isFreeBSDKPrintf)
322 k = ConversionSpecifier::nArg;
323 break;
324 case 'o': k = ConversionSpecifier::oArg; break;
325 case 'p': k = ConversionSpecifier::pArg; break;
326 case 's': k = ConversionSpecifier::sArg; break;
327 case 'u': k = ConversionSpecifier::uArg; break;
328 case 'x': k = ConversionSpecifier::xArg; break;
329 // C23.
330 case 'b':
331 if (isFreeBSDKPrintf)
332 k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
333 else
334 k = ConversionSpecifier::bArg;
335 break;
336 case 'B': k = ConversionSpecifier::BArg; break;
337 // POSIX specific.
338 case 'C': k = ConversionSpecifier::CArg; break;
339 case 'S': k = ConversionSpecifier::SArg; break;
340 // Apple extension for os_log
341 case 'P':
342 k = ConversionSpecifier::PArg;
343 break;
344 // Objective-C.
345 case '@': k = ConversionSpecifier::ObjCObjArg; break;
346 // Glibc specific.
347 case 'm': k = ConversionSpecifier::PrintErrno; break;
348 case 'r':
349 if (isFreeBSDKPrintf)
350 k = ConversionSpecifier::FreeBSDrArg; // int
351 else if (LO.FixedPoint)
352 k = ConversionSpecifier::rArg;
353 break;
354 case 'y':
355 if (isFreeBSDKPrintf)
356 k = ConversionSpecifier::FreeBSDyArg; // int
357 break;
358 // Apple-specific.
359 case 'D':
360 if (isFreeBSDKPrintf)
361 k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
362 else if (Target.getTriple().isOSDarwin())
363 k = ConversionSpecifier::DArg;
364 break;
365 case 'O':
366 if (Target.getTriple().isOSDarwin())
367 k = ConversionSpecifier::OArg;
368 break;
369 case 'U':
370 if (Target.getTriple().isOSDarwin())
371 k = ConversionSpecifier::UArg;
372 break;
373 // MS specific.
374 case 'Z':
375 if (Target.getTriple().isOSMSVCRT())
376 k = ConversionSpecifier::ZArg;
377 break;
378 // ISO/IEC TR 18037 (fixed-point) specific.
379 // NOTE: 'r' is handled up above since FreeBSD also supports %r.
380 case 'k':
381 if (LO.FixedPoint)
382 k = ConversionSpecifier::kArg;
383 break;
384 case 'K':
385 if (LO.FixedPoint)
386 k = ConversionSpecifier::KArg;
387 break;
388 case 'R':
389 if (LO.FixedPoint)
390 k = ConversionSpecifier::RArg;
391 break;
392 }
393
394 // Check to see if we used the Objective-C modifier flags with
395 // a conversion specifier other than '@'.
396 if (k != ConversionSpecifier::ObjCObjArg &&
397 k != ConversionSpecifier::InvalidSpecifier &&
398 ObjCModifierFlagsStart) {
399 H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
400 ObjCModifierFlagsEnd + 1,
401 conversionPosition);
402 return true;
403 }
404
405 PrintfConversionSpecifier CS(conversionPosition, k);
406 FS.setConversionSpecifier(CS);
407 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
408 FS.setArgIndex(argIndex++);
409 // FreeBSD kernel specific.
410 if (k == ConversionSpecifier::FreeBSDbArg ||
411 k == ConversionSpecifier::FreeBSDDArg)
412 argIndex++;
413
414 if (k == ConversionSpecifier::InvalidSpecifier) {
415 unsigned Len = I - Start;
416 if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
417 CS.setEndScanList(Start + Len);
418 FS.setConversionSpecifier(CS);
419 }
420 // Assume the conversion takes one argument.
421 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
422 }
423 return PrintfSpecifierResult(Start, FS);
424 }
425
ParsePrintfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target,bool isFreeBSDKPrintf)426 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
427 const char *I,
428 const char *E,
429 const LangOptions &LO,
430 const TargetInfo &Target,
431 bool isFreeBSDKPrintf) {
432
433 unsigned argIndex = 0;
434
435 // Keep looking for a format specifier until we have exhausted the string.
436 while (I != E) {
437 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
438 LO, Target, true,
439 isFreeBSDKPrintf);
440 // Did a fail-stop error of any kind occur when parsing the specifier?
441 // If so, don't do any more processing.
442 if (FSR.shouldStop())
443 return true;
444 // Did we exhaust the string or encounter an error that
445 // we can recover from?
446 if (!FSR.hasValue())
447 continue;
448 // We have a format specifier. Pass it to the callback.
449 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
450 I - FSR.getStart(), Target))
451 return true;
452 }
453 assert(I == E && "Format string not exhausted");
454 return false;
455 }
456
ParseFormatStringHasSArg(const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)457 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
458 const char *E,
459 const LangOptions &LO,
460 const TargetInfo &Target) {
461
462 unsigned argIndex = 0;
463
464 // Keep looking for a %s format specifier until we have exhausted the string.
465 FormatStringHandler H;
466 while (I != E) {
467 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
468 LO, Target, false,
469 false);
470 // Did a fail-stop error of any kind occur when parsing the specifier?
471 // If so, don't do any more processing.
472 if (FSR.shouldStop())
473 return false;
474 // Did we exhaust the string or encounter an error that
475 // we can recover from?
476 if (!FSR.hasValue())
477 continue;
478 const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
479 // Return true if this a %s format specifier.
480 if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
481 return true;
482 }
483 return false;
484 }
485
parseFormatStringHasFormattingSpecifiers(const char * Begin,const char * End,const LangOptions & LO,const TargetInfo & Target)486 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
487 const char *Begin, const char *End, const LangOptions &LO,
488 const TargetInfo &Target) {
489 unsigned ArgIndex = 0;
490 // Keep looking for a formatting specifier until we have exhausted the string.
491 FormatStringHandler H;
492 while (Begin != End) {
493 const PrintfSpecifierResult &FSR =
494 ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
495 if (FSR.shouldStop())
496 break;
497 if (FSR.hasValue())
498 return true;
499 }
500 return false;
501 }
502
503 //===----------------------------------------------------------------------===//
504 // Methods on PrintfSpecifier.
505 //===----------------------------------------------------------------------===//
506
getScalarArgType(ASTContext & Ctx,bool IsObjCLiteral) const507 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
508 bool IsObjCLiteral) const {
509 if (CS.getKind() == ConversionSpecifier::cArg)
510 switch (LM.getKind()) {
511 case LengthModifier::None:
512 return Ctx.IntTy;
513 case LengthModifier::AsLong:
514 case LengthModifier::AsWide:
515 return ArgType(ArgType::WIntTy, "wint_t");
516 case LengthModifier::AsShort:
517 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
518 return Ctx.IntTy;
519 [[fallthrough]];
520 default:
521 return ArgType::Invalid();
522 }
523
524 if (CS.isIntArg())
525 switch (LM.getKind()) {
526 case LengthModifier::AsLongDouble:
527 // GNU extension.
528 return Ctx.LongLongTy;
529 case LengthModifier::None:
530 case LengthModifier::AsShortLong:
531 return Ctx.IntTy;
532 case LengthModifier::AsInt32:
533 return ArgType(Ctx.IntTy, "__int32");
534 case LengthModifier::AsChar:
535 return ArgType::AnyCharTy;
536 case LengthModifier::AsShort: return Ctx.ShortTy;
537 case LengthModifier::AsLong: return Ctx.LongTy;
538 case LengthModifier::AsLongLong:
539 case LengthModifier::AsQuad:
540 return Ctx.LongLongTy;
541 case LengthModifier::AsInt64:
542 return ArgType(Ctx.LongLongTy, "__int64");
543 case LengthModifier::AsIntMax:
544 return ArgType(Ctx.getIntMaxType(), "intmax_t");
545 case LengthModifier::AsSizeT:
546 return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
547 case LengthModifier::AsInt3264:
548 return Ctx.getTargetInfo().getTriple().isArch64Bit()
549 ? ArgType(Ctx.LongLongTy, "__int64")
550 : ArgType(Ctx.IntTy, "__int32");
551 case LengthModifier::AsPtrDiff:
552 return ArgType::makePtrdiffT(
553 ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
554 case LengthModifier::AsAllocate:
555 case LengthModifier::AsMAllocate:
556 case LengthModifier::AsWide:
557 return ArgType::Invalid();
558 }
559
560 if (CS.isUIntArg())
561 switch (LM.getKind()) {
562 case LengthModifier::AsLongDouble:
563 // GNU extension.
564 return Ctx.UnsignedLongLongTy;
565 case LengthModifier::None:
566 case LengthModifier::AsShortLong:
567 return Ctx.UnsignedIntTy;
568 case LengthModifier::AsInt32:
569 return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
570 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
571 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
572 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
573 case LengthModifier::AsLongLong:
574 case LengthModifier::AsQuad:
575 return Ctx.UnsignedLongLongTy;
576 case LengthModifier::AsInt64:
577 return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
578 case LengthModifier::AsIntMax:
579 return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
580 case LengthModifier::AsSizeT:
581 return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
582 case LengthModifier::AsInt3264:
583 return Ctx.getTargetInfo().getTriple().isArch64Bit()
584 ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
585 : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
586 case LengthModifier::AsPtrDiff:
587 return ArgType::makePtrdiffT(
588 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
589 case LengthModifier::AsAllocate:
590 case LengthModifier::AsMAllocate:
591 case LengthModifier::AsWide:
592 return ArgType::Invalid();
593 }
594
595 if (CS.isDoubleArg()) {
596 if (!VectorNumElts.isInvalid()) {
597 switch (LM.getKind()) {
598 case LengthModifier::AsShort:
599 return Ctx.HalfTy;
600 case LengthModifier::AsShortLong:
601 return Ctx.FloatTy;
602 case LengthModifier::AsLong:
603 default:
604 return Ctx.DoubleTy;
605 }
606 }
607
608 if (LM.getKind() == LengthModifier::AsLongDouble)
609 return Ctx.LongDoubleTy;
610 return Ctx.DoubleTy;
611 }
612
613 if (CS.getKind() == ConversionSpecifier::nArg) {
614 switch (LM.getKind()) {
615 case LengthModifier::None:
616 return ArgType::PtrTo(Ctx.IntTy);
617 case LengthModifier::AsChar:
618 return ArgType::PtrTo(Ctx.SignedCharTy);
619 case LengthModifier::AsShort:
620 return ArgType::PtrTo(Ctx.ShortTy);
621 case LengthModifier::AsLong:
622 return ArgType::PtrTo(Ctx.LongTy);
623 case LengthModifier::AsLongLong:
624 case LengthModifier::AsQuad:
625 return ArgType::PtrTo(Ctx.LongLongTy);
626 case LengthModifier::AsIntMax:
627 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
628 case LengthModifier::AsSizeT:
629 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
630 case LengthModifier::AsPtrDiff:
631 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
632 case LengthModifier::AsLongDouble:
633 return ArgType(); // FIXME: Is this a known extension?
634 case LengthModifier::AsAllocate:
635 case LengthModifier::AsMAllocate:
636 case LengthModifier::AsInt32:
637 case LengthModifier::AsInt3264:
638 case LengthModifier::AsInt64:
639 case LengthModifier::AsWide:
640 return ArgType::Invalid();
641 case LengthModifier::AsShortLong:
642 llvm_unreachable("only used for OpenCL which doesn not handle nArg");
643 }
644 }
645
646 if (CS.isFixedPointArg() && !Ctx.getLangOpts().FixedPoint)
647 return ArgType::Invalid();
648
649 switch (CS.getKind()) {
650 case ConversionSpecifier::sArg:
651 if (LM.getKind() == LengthModifier::AsWideChar) {
652 if (IsObjCLiteral)
653 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
654 "const unichar *");
655 return ArgType(ArgType::WCStrTy, "wchar_t *");
656 }
657 if (LM.getKind() == LengthModifier::AsWide)
658 return ArgType(ArgType::WCStrTy, "wchar_t *");
659 return ArgType::CStrTy;
660 case ConversionSpecifier::SArg:
661 if (IsObjCLiteral)
662 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
663 "const unichar *");
664 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
665 LM.getKind() == LengthModifier::AsShort)
666 return ArgType::CStrTy;
667 return ArgType(ArgType::WCStrTy, "wchar_t *");
668 case ConversionSpecifier::CArg:
669 if (IsObjCLiteral)
670 return ArgType(Ctx.UnsignedShortTy, "unichar");
671 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
672 LM.getKind() == LengthModifier::AsShort)
673 return Ctx.IntTy;
674 return ArgType(Ctx.WideCharTy, "wchar_t");
675 case ConversionSpecifier::pArg:
676 case ConversionSpecifier::PArg:
677 return ArgType::CPointerTy;
678 case ConversionSpecifier::ObjCObjArg:
679 return ArgType::ObjCPointerTy;
680 case ConversionSpecifier::kArg:
681 switch (LM.getKind()) {
682 case LengthModifier::None:
683 return Ctx.AccumTy;
684 case LengthModifier::AsShort:
685 return Ctx.ShortAccumTy;
686 case LengthModifier::AsLong:
687 return Ctx.LongAccumTy;
688 default:
689 return ArgType::Invalid();
690 }
691 case ConversionSpecifier::KArg:
692 switch (LM.getKind()) {
693 case LengthModifier::None:
694 return Ctx.UnsignedAccumTy;
695 case LengthModifier::AsShort:
696 return Ctx.UnsignedShortAccumTy;
697 case LengthModifier::AsLong:
698 return Ctx.UnsignedLongAccumTy;
699 default:
700 return ArgType::Invalid();
701 }
702 case ConversionSpecifier::rArg:
703 switch (LM.getKind()) {
704 case LengthModifier::None:
705 return Ctx.FractTy;
706 case LengthModifier::AsShort:
707 return Ctx.ShortFractTy;
708 case LengthModifier::AsLong:
709 return Ctx.LongFractTy;
710 default:
711 return ArgType::Invalid();
712 }
713 case ConversionSpecifier::RArg:
714 switch (LM.getKind()) {
715 case LengthModifier::None:
716 return Ctx.UnsignedFractTy;
717 case LengthModifier::AsShort:
718 return Ctx.UnsignedShortFractTy;
719 case LengthModifier::AsLong:
720 return Ctx.UnsignedLongFractTy;
721 default:
722 return ArgType::Invalid();
723 }
724 default:
725 break;
726 }
727
728 // FIXME: Handle other cases.
729 return ArgType();
730 }
731
732
getArgType(ASTContext & Ctx,bool IsObjCLiteral) const733 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
734 bool IsObjCLiteral) const {
735 const PrintfConversionSpecifier &CS = getConversionSpecifier();
736
737 if (!CS.consumesDataArgument())
738 return ArgType::Invalid();
739
740 ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
741 if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
742 return ScalarTy;
743
744 return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
745 }
746
fixType(QualType QT,const LangOptions & LangOpt,ASTContext & Ctx,bool IsObjCLiteral)747 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
748 ASTContext &Ctx, bool IsObjCLiteral) {
749 // %n is different from other conversion specifiers; don't try to fix it.
750 if (CS.getKind() == ConversionSpecifier::nArg)
751 return false;
752
753 // Handle Objective-C objects first. Note that while the '%@' specifier will
754 // not warn for structure pointer or void pointer arguments (because that's
755 // how CoreFoundation objects are implemented), we only show a fixit for '%@'
756 // if we know it's an object (block, id, class, or __attribute__((NSObject))).
757 if (QT->isObjCRetainableType()) {
758 if (!IsObjCLiteral)
759 return false;
760
761 CS.setKind(ConversionSpecifier::ObjCObjArg);
762
763 // Disable irrelevant flags
764 HasThousandsGrouping = false;
765 HasPlusPrefix = false;
766 HasSpacePrefix = false;
767 HasAlternativeForm = false;
768 HasLeadingZeroes = false;
769 Precision.setHowSpecified(OptionalAmount::NotSpecified);
770 LM.setKind(LengthModifier::None);
771
772 return true;
773 }
774
775 // Handle strings next (char *, wchar_t *)
776 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
777 CS.setKind(ConversionSpecifier::sArg);
778
779 // Disable irrelevant flags
780 HasAlternativeForm = false;
781 HasLeadingZeroes = false;
782
783 // Set the long length modifier for wide characters
784 if (QT->getPointeeType()->isWideCharType())
785 LM.setKind(LengthModifier::AsWideChar);
786 else
787 LM.setKind(LengthModifier::None);
788
789 return true;
790 }
791
792 // If it's an enum, get its underlying type.
793 if (const EnumType *ETy = QT->getAs<EnumType>())
794 QT = ETy->getDecl()->getIntegerType();
795
796 const BuiltinType *BT = QT->getAs<BuiltinType>();
797 if (!BT) {
798 const VectorType *VT = QT->getAs<VectorType>();
799 if (VT) {
800 QT = VT->getElementType();
801 BT = QT->getAs<BuiltinType>();
802 VectorNumElts = OptionalAmount(VT->getNumElements());
803 }
804 }
805
806 // We can only work with builtin types.
807 if (!BT)
808 return false;
809
810 // Set length modifier
811 switch (BT->getKind()) {
812 case BuiltinType::Bool:
813 case BuiltinType::WChar_U:
814 case BuiltinType::WChar_S:
815 case BuiltinType::Char8: // FIXME: Treat like 'char'?
816 case BuiltinType::Char16:
817 case BuiltinType::Char32:
818 case BuiltinType::UInt128:
819 case BuiltinType::Int128:
820 case BuiltinType::Half:
821 case BuiltinType::BFloat16:
822 case BuiltinType::Float16:
823 case BuiltinType::Float128:
824 case BuiltinType::Ibm128:
825 case BuiltinType::ShortAccum:
826 case BuiltinType::Accum:
827 case BuiltinType::LongAccum:
828 case BuiltinType::UShortAccum:
829 case BuiltinType::UAccum:
830 case BuiltinType::ULongAccum:
831 case BuiltinType::ShortFract:
832 case BuiltinType::Fract:
833 case BuiltinType::LongFract:
834 case BuiltinType::UShortFract:
835 case BuiltinType::UFract:
836 case BuiltinType::ULongFract:
837 case BuiltinType::SatShortAccum:
838 case BuiltinType::SatAccum:
839 case BuiltinType::SatLongAccum:
840 case BuiltinType::SatUShortAccum:
841 case BuiltinType::SatUAccum:
842 case BuiltinType::SatULongAccum:
843 case BuiltinType::SatShortFract:
844 case BuiltinType::SatFract:
845 case BuiltinType::SatLongFract:
846 case BuiltinType::SatUShortFract:
847 case BuiltinType::SatUFract:
848 case BuiltinType::SatULongFract:
849 // Various types which are non-trivial to correct.
850 return false;
851
852 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
853 case BuiltinType::Id:
854 #include "clang/Basic/OpenCLImageTypes.def"
855 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
856 case BuiltinType::Id:
857 #include "clang/Basic/OpenCLExtensionTypes.def"
858 #define SVE_TYPE(Name, Id, SingletonId) \
859 case BuiltinType::Id:
860 #include "clang/Basic/AArch64SVEACLETypes.def"
861 #define PPC_VECTOR_TYPE(Name, Id, Size) \
862 case BuiltinType::Id:
863 #include "clang/Basic/PPCTypes.def"
864 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
865 #include "clang/Basic/RISCVVTypes.def"
866 #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
867 #include "clang/Basic/WebAssemblyReferenceTypes.def"
868 #define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
869 #include "clang/Basic/AMDGPUTypes.def"
870 #define SIGNED_TYPE(Id, SingletonId)
871 #define UNSIGNED_TYPE(Id, SingletonId)
872 #define FLOATING_TYPE(Id, SingletonId)
873 #define BUILTIN_TYPE(Id, SingletonId) \
874 case BuiltinType::Id:
875 #include "clang/AST/BuiltinTypes.def"
876 // Misc other stuff which doesn't make sense here.
877 return false;
878
879 case BuiltinType::UInt:
880 case BuiltinType::Int:
881 case BuiltinType::Float:
882 LM.setKind(VectorNumElts.isInvalid() ?
883 LengthModifier::None : LengthModifier::AsShortLong);
884 break;
885 case BuiltinType::Double:
886 LM.setKind(VectorNumElts.isInvalid() ?
887 LengthModifier::None : LengthModifier::AsLong);
888 break;
889 case BuiltinType::Char_U:
890 case BuiltinType::UChar:
891 case BuiltinType::Char_S:
892 case BuiltinType::SChar:
893 LM.setKind(LengthModifier::AsChar);
894 break;
895
896 case BuiltinType::Short:
897 case BuiltinType::UShort:
898 LM.setKind(LengthModifier::AsShort);
899 break;
900
901 case BuiltinType::Long:
902 case BuiltinType::ULong:
903 LM.setKind(LengthModifier::AsLong);
904 break;
905
906 case BuiltinType::LongLong:
907 case BuiltinType::ULongLong:
908 LM.setKind(LengthModifier::AsLongLong);
909 break;
910
911 case BuiltinType::LongDouble:
912 LM.setKind(LengthModifier::AsLongDouble);
913 break;
914 }
915
916 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
917 if (LangOpt.C99 || LangOpt.CPlusPlus11)
918 namedTypeToLengthModifier(QT, LM);
919
920 // If fixing the length modifier was enough, we might be done.
921 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
922 // If we're going to offer a fix anyway, make sure the sign matches.
923 switch (CS.getKind()) {
924 case ConversionSpecifier::uArg:
925 case ConversionSpecifier::UArg:
926 if (QT->isSignedIntegerType())
927 CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
928 break;
929 case ConversionSpecifier::dArg:
930 case ConversionSpecifier::DArg:
931 case ConversionSpecifier::iArg:
932 if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
933 CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
934 break;
935 default:
936 // Other specifiers do not have signed/unsigned variants.
937 break;
938 }
939
940 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
941 if (ATR.isValid() && ATR.matchesType(Ctx, QT))
942 return true;
943 }
944
945 // Set conversion specifier and disable any flags which do not apply to it.
946 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
947 if (!QT->getAs<TypedefType>() && QT->isCharType()) {
948 CS.setKind(ConversionSpecifier::cArg);
949 LM.setKind(LengthModifier::None);
950 Precision.setHowSpecified(OptionalAmount::NotSpecified);
951 HasAlternativeForm = false;
952 HasLeadingZeroes = false;
953 HasPlusPrefix = false;
954 }
955 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
956 else if (QT->isRealFloatingType()) {
957 CS.setKind(ConversionSpecifier::fArg);
958 } else if (QT->isSignedIntegerType()) {
959 CS.setKind(ConversionSpecifier::dArg);
960 HasAlternativeForm = false;
961 } else if (QT->isUnsignedIntegerType()) {
962 CS.setKind(ConversionSpecifier::uArg);
963 HasAlternativeForm = false;
964 HasPlusPrefix = false;
965 } else {
966 llvm_unreachable("Unexpected type");
967 }
968
969 return true;
970 }
971
toString(raw_ostream & os) const972 void PrintfSpecifier::toString(raw_ostream &os) const {
973 // Whilst some features have no defined order, we are using the order
974 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
975 os << "%";
976
977 // Positional args
978 if (usesPositionalArg()) {
979 os << getPositionalArgIndex() << "$";
980 }
981
982 // Conversion flags
983 if (IsLeftJustified) os << "-";
984 if (HasPlusPrefix) os << "+";
985 if (HasSpacePrefix) os << " ";
986 if (HasAlternativeForm) os << "#";
987 if (HasLeadingZeroes) os << "0";
988
989 // Minimum field width
990 FieldWidth.toString(os);
991 // Precision
992 Precision.toString(os);
993
994 // Vector modifier
995 if (!VectorNumElts.isInvalid())
996 os << 'v' << VectorNumElts.getConstantAmount();
997
998 // Length modifier
999 os << LM.toString();
1000 // Conversion specifier
1001 os << CS.toString();
1002 }
1003
hasValidPlusPrefix() const1004 bool PrintfSpecifier::hasValidPlusPrefix() const {
1005 if (!HasPlusPrefix)
1006 return true;
1007
1008 // The plus prefix only makes sense for signed conversions
1009 switch (CS.getKind()) {
1010 case ConversionSpecifier::dArg:
1011 case ConversionSpecifier::DArg:
1012 case ConversionSpecifier::iArg:
1013 case ConversionSpecifier::fArg:
1014 case ConversionSpecifier::FArg:
1015 case ConversionSpecifier::eArg:
1016 case ConversionSpecifier::EArg:
1017 case ConversionSpecifier::gArg:
1018 case ConversionSpecifier::GArg:
1019 case ConversionSpecifier::aArg:
1020 case ConversionSpecifier::AArg:
1021 case ConversionSpecifier::FreeBSDrArg:
1022 case ConversionSpecifier::FreeBSDyArg:
1023 case ConversionSpecifier::rArg:
1024 case ConversionSpecifier::kArg:
1025 return true;
1026
1027 default:
1028 return false;
1029 }
1030 }
1031
hasValidAlternativeForm() const1032 bool PrintfSpecifier::hasValidAlternativeForm() const {
1033 if (!HasAlternativeForm)
1034 return true;
1035
1036 // Alternate form flag only valid with the bBoxXaAeEfFgGrRkK conversions
1037 switch (CS.getKind()) {
1038 case ConversionSpecifier::bArg:
1039 case ConversionSpecifier::BArg:
1040 case ConversionSpecifier::oArg:
1041 case ConversionSpecifier::OArg:
1042 case ConversionSpecifier::xArg:
1043 case ConversionSpecifier::XArg:
1044 case ConversionSpecifier::aArg:
1045 case ConversionSpecifier::AArg:
1046 case ConversionSpecifier::eArg:
1047 case ConversionSpecifier::EArg:
1048 case ConversionSpecifier::fArg:
1049 case ConversionSpecifier::FArg:
1050 case ConversionSpecifier::gArg:
1051 case ConversionSpecifier::GArg:
1052 case ConversionSpecifier::FreeBSDrArg:
1053 case ConversionSpecifier::FreeBSDyArg:
1054 case ConversionSpecifier::rArg:
1055 case ConversionSpecifier::RArg:
1056 case ConversionSpecifier::kArg:
1057 case ConversionSpecifier::KArg:
1058 return true;
1059
1060 default:
1061 return false;
1062 }
1063 }
1064
hasValidLeadingZeros() const1065 bool PrintfSpecifier::hasValidLeadingZeros() const {
1066 if (!HasLeadingZeroes)
1067 return true;
1068
1069 // Leading zeroes flag only valid with the bBdiouxXaAeEfFgGrRkK conversions
1070 switch (CS.getKind()) {
1071 case ConversionSpecifier::bArg:
1072 case ConversionSpecifier::BArg:
1073 case ConversionSpecifier::dArg:
1074 case ConversionSpecifier::DArg:
1075 case ConversionSpecifier::iArg:
1076 case ConversionSpecifier::oArg:
1077 case ConversionSpecifier::OArg:
1078 case ConversionSpecifier::uArg:
1079 case ConversionSpecifier::UArg:
1080 case ConversionSpecifier::xArg:
1081 case ConversionSpecifier::XArg:
1082 case ConversionSpecifier::aArg:
1083 case ConversionSpecifier::AArg:
1084 case ConversionSpecifier::eArg:
1085 case ConversionSpecifier::EArg:
1086 case ConversionSpecifier::fArg:
1087 case ConversionSpecifier::FArg:
1088 case ConversionSpecifier::gArg:
1089 case ConversionSpecifier::GArg:
1090 case ConversionSpecifier::FreeBSDrArg:
1091 case ConversionSpecifier::FreeBSDyArg:
1092 case ConversionSpecifier::rArg:
1093 case ConversionSpecifier::RArg:
1094 case ConversionSpecifier::kArg:
1095 case ConversionSpecifier::KArg:
1096 return true;
1097
1098 default:
1099 return false;
1100 }
1101 }
1102
hasValidSpacePrefix() const1103 bool PrintfSpecifier::hasValidSpacePrefix() const {
1104 if (!HasSpacePrefix)
1105 return true;
1106
1107 // The space prefix only makes sense for signed conversions
1108 switch (CS.getKind()) {
1109 case ConversionSpecifier::dArg:
1110 case ConversionSpecifier::DArg:
1111 case ConversionSpecifier::iArg:
1112 case ConversionSpecifier::fArg:
1113 case ConversionSpecifier::FArg:
1114 case ConversionSpecifier::eArg:
1115 case ConversionSpecifier::EArg:
1116 case ConversionSpecifier::gArg:
1117 case ConversionSpecifier::GArg:
1118 case ConversionSpecifier::aArg:
1119 case ConversionSpecifier::AArg:
1120 case ConversionSpecifier::FreeBSDrArg:
1121 case ConversionSpecifier::FreeBSDyArg:
1122 case ConversionSpecifier::rArg:
1123 case ConversionSpecifier::kArg:
1124 return true;
1125
1126 default:
1127 return false;
1128 }
1129 }
1130
hasValidLeftJustified() const1131 bool PrintfSpecifier::hasValidLeftJustified() const {
1132 if (!IsLeftJustified)
1133 return true;
1134
1135 // The left justified flag is valid for all conversions except n
1136 switch (CS.getKind()) {
1137 case ConversionSpecifier::nArg:
1138 return false;
1139
1140 default:
1141 return true;
1142 }
1143 }
1144
hasValidThousandsGroupingPrefix() const1145 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1146 if (!HasThousandsGrouping)
1147 return true;
1148
1149 switch (CS.getKind()) {
1150 case ConversionSpecifier::dArg:
1151 case ConversionSpecifier::DArg:
1152 case ConversionSpecifier::iArg:
1153 case ConversionSpecifier::uArg:
1154 case ConversionSpecifier::UArg:
1155 case ConversionSpecifier::fArg:
1156 case ConversionSpecifier::FArg:
1157 case ConversionSpecifier::gArg:
1158 case ConversionSpecifier::GArg:
1159 return true;
1160 default:
1161 return false;
1162 }
1163 }
1164
hasValidPrecision() const1165 bool PrintfSpecifier::hasValidPrecision() const {
1166 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1167 return true;
1168
1169 // Precision is only valid with the bBdiouxXaAeEfFgGsPrRkK conversions
1170 switch (CS.getKind()) {
1171 case ConversionSpecifier::bArg:
1172 case ConversionSpecifier::BArg:
1173 case ConversionSpecifier::dArg:
1174 case ConversionSpecifier::DArg:
1175 case ConversionSpecifier::iArg:
1176 case ConversionSpecifier::oArg:
1177 case ConversionSpecifier::OArg:
1178 case ConversionSpecifier::uArg:
1179 case ConversionSpecifier::UArg:
1180 case ConversionSpecifier::xArg:
1181 case ConversionSpecifier::XArg:
1182 case ConversionSpecifier::aArg:
1183 case ConversionSpecifier::AArg:
1184 case ConversionSpecifier::eArg:
1185 case ConversionSpecifier::EArg:
1186 case ConversionSpecifier::fArg:
1187 case ConversionSpecifier::FArg:
1188 case ConversionSpecifier::gArg:
1189 case ConversionSpecifier::GArg:
1190 case ConversionSpecifier::sArg:
1191 case ConversionSpecifier::FreeBSDrArg:
1192 case ConversionSpecifier::FreeBSDyArg:
1193 case ConversionSpecifier::PArg:
1194 case ConversionSpecifier::rArg:
1195 case ConversionSpecifier::RArg:
1196 case ConversionSpecifier::kArg:
1197 case ConversionSpecifier::KArg:
1198 return true;
1199
1200 default:
1201 return false;
1202 }
1203 }
hasValidFieldWidth() const1204 bool PrintfSpecifier::hasValidFieldWidth() const {
1205 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1206 return true;
1207
1208 // The field width is valid for all conversions except n
1209 switch (CS.getKind()) {
1210 case ConversionSpecifier::nArg:
1211 return false;
1212
1213 default:
1214 return true;
1215 }
1216 }
1217