xref: /freebsd/contrib/llvm-project/clang/lib/AST/ScanfFormatString.cpp (revision f5b7695d2d5abd735064870ad43f4b9c723940c1)
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in scanf and friends.  The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
17 
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfConversionSpecifier;
24 using clang::analyze_scanf::ScanfSpecifier;
25 using clang::UpdateOnReturn;
26 using namespace clang;
27 
28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29         ScanfSpecifierResult;
30 
31 static bool ParseScanList(FormatStringHandler &H,
32                           ScanfConversionSpecifier &CS,
33                           const char *&Beg, const char *E) {
34   const char *I = Beg;
35   const char *start = I - 1;
36   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37 
38   // No more characters?
39   if (I == E) {
40     H.HandleIncompleteScanList(start, I);
41     return true;
42   }
43 
44   // Special case: ']' is the first character.
45   if (*I == ']') {
46     if (++I == E) {
47       H.HandleIncompleteScanList(start, I - 1);
48       return true;
49     }
50   }
51 
52   // Special case: "^]" are the first characters.
53   if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54     I += 2;
55     if (I == E) {
56       H.HandleIncompleteScanList(start, I - 1);
57       return true;
58     }
59   }
60 
61   // Look for a ']' character which denotes the end of the scan list.
62   while (*I != ']') {
63     if (++I == E) {
64       H.HandleIncompleteScanList(start, I - 1);
65       return true;
66     }
67   }
68 
69   CS.setEndScanList(I);
70   return false;
71 }
72 
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
75 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76                                                 const char *&Beg,
77                                                 const char *E,
78                                                 unsigned &argIndex,
79                                                 const LangOptions &LO,
80                                                 const TargetInfo &Target) {
81   using namespace clang::analyze_format_string;
82   using namespace clang::analyze_scanf;
83   const char *I = Beg;
84   const char *Start = nullptr;
85   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87     // Look for a '%' character that indicates the start of a format specifier.
88   for ( ; I != E ; ++I) {
89     char c = *I;
90     if (c == '\0') {
91         // Detect spurious null characters, which are likely errors.
92       H.HandleNullChar(I);
93       return true;
94     }
95     if (c == '%') {
96       Start = I++;  // Record the start of the format specifier.
97       break;
98     }
99   }
100 
101     // No format specifier found?
102   if (!Start)
103     return false;
104 
105   if (I == E) {
106       // No more characters left?
107     H.HandleIncompleteSpecifier(Start, E - Start);
108     return true;
109   }
110 
111   ScanfSpecifier FS;
112   if (ParseArgPosition(H, FS, Start, I, E))
113     return true;
114 
115   if (I == E) {
116       // No more characters left?
117     H.HandleIncompleteSpecifier(Start, E - Start);
118     return true;
119   }
120 
121   // Look for '*' flag if it is present.
122   if (*I == '*') {
123     FS.setSuppressAssignment(I);
124     if (++I == E) {
125       H.HandleIncompleteSpecifier(Start, E - Start);
126       return true;
127     }
128   }
129 
130   // Look for the field width (if any).  Unlike printf, this is either
131   // a fixed integer or isn't present.
132   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135     FS.setFieldWidth(Amt);
136 
137     if (I == E) {
138       // No more characters left?
139       H.HandleIncompleteSpecifier(Start, E - Start);
140       return true;
141     }
142   }
143 
144   // Look for the length modifier.
145   if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146       // No more characters left?
147     H.HandleIncompleteSpecifier(Start, E - Start);
148     return true;
149   }
150 
151   // Detect spurious null characters, which are likely errors.
152   if (*I == '\0') {
153     H.HandleNullChar(I);
154     return true;
155   }
156 
157   // Finally, look for the conversion specifier.
158   const char *conversionPosition = I++;
159   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160   switch (*conversionPosition) {
161     default:
162       break;
163     case '%': k = ConversionSpecifier::PercentArg;   break;
164     case 'A': k = ConversionSpecifier::AArg; break;
165     case 'E': k = ConversionSpecifier::EArg; break;
166     case 'F': k = ConversionSpecifier::FArg; break;
167     case 'G': k = ConversionSpecifier::GArg; break;
168     case 'X': k = ConversionSpecifier::XArg; break;
169     case 'a': k = ConversionSpecifier::aArg; break;
170     case 'd': k = ConversionSpecifier::dArg; break;
171     case 'e': k = ConversionSpecifier::eArg; break;
172     case 'f': k = ConversionSpecifier::fArg; break;
173     case 'g': k = ConversionSpecifier::gArg; break;
174     case 'i': k = ConversionSpecifier::iArg; break;
175     case 'n': k = ConversionSpecifier::nArg; break;
176     case 'c': k = ConversionSpecifier::cArg; break;
177     case 'C': k = ConversionSpecifier::CArg; break;
178     case 'S': k = ConversionSpecifier::SArg; break;
179     case '[': k = ConversionSpecifier::ScanListArg; break;
180     case 'u': k = ConversionSpecifier::uArg; break;
181     case 'x': k = ConversionSpecifier::xArg; break;
182     case 'o': k = ConversionSpecifier::oArg; break;
183     case 's': k = ConversionSpecifier::sArg; break;
184     case 'p': k = ConversionSpecifier::pArg; break;
185     // Apple extensions
186       // Apple-specific
187     case 'D':
188       if (Target.getTriple().isOSDarwin())
189         k = ConversionSpecifier::DArg;
190       break;
191     case 'O':
192       if (Target.getTriple().isOSDarwin())
193         k = ConversionSpecifier::OArg;
194       break;
195     case 'U':
196       if (Target.getTriple().isOSDarwin())
197         k = ConversionSpecifier::UArg;
198       break;
199   }
200   ScanfConversionSpecifier CS(conversionPosition, k);
201   if (k == ScanfConversionSpecifier::ScanListArg) {
202     if (ParseScanList(H, CS, I, E))
203       return true;
204   }
205   FS.setConversionSpecifier(CS);
206   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
207       && !FS.usesPositionalArg())
208     FS.setArgIndex(argIndex++);
209 
210   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
211   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
212 
213   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214     unsigned Len = I - Beg;
215     if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216       CS.setEndScanList(Beg + Len);
217       FS.setConversionSpecifier(CS);
218     }
219     // Assume the conversion takes one argument.
220     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
221   }
222   return ScanfSpecifierResult(Start, FS);
223 }
224 
225 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226   const ScanfConversionSpecifier &CS = getConversionSpecifier();
227 
228   if (!CS.consumesDataArgument())
229     return ArgType::Invalid();
230 
231   switch(CS.getKind()) {
232     // Signed int.
233     case ConversionSpecifier::dArg:
234     case ConversionSpecifier::DArg:
235     case ConversionSpecifier::iArg:
236       switch (LM.getKind()) {
237         case LengthModifier::None:
238           return ArgType::PtrTo(Ctx.IntTy);
239         case LengthModifier::AsChar:
240           return ArgType::PtrTo(ArgType::AnyCharTy);
241         case LengthModifier::AsShort:
242           return ArgType::PtrTo(Ctx.ShortTy);
243         case LengthModifier::AsLong:
244           return ArgType::PtrTo(Ctx.LongTy);
245         case LengthModifier::AsLongLong:
246         case LengthModifier::AsQuad:
247           return ArgType::PtrTo(Ctx.LongLongTy);
248         case LengthModifier::AsInt64:
249           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
250         case LengthModifier::AsIntMax:
251           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
252         case LengthModifier::AsSizeT:
253           return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
254         case LengthModifier::AsPtrDiff:
255           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
256         case LengthModifier::AsLongDouble:
257           // GNU extension.
258           return ArgType::PtrTo(Ctx.LongLongTy);
259         case LengthModifier::AsAllocate:
260         case LengthModifier::AsMAllocate:
261         case LengthModifier::AsInt32:
262         case LengthModifier::AsInt3264:
263         case LengthModifier::AsWide:
264         case LengthModifier::AsShortLong:
265           return ArgType::Invalid();
266       }
267       llvm_unreachable("Unsupported LengthModifier Type");
268 
269     // Unsigned int.
270     case ConversionSpecifier::oArg:
271     case ConversionSpecifier::OArg:
272     case ConversionSpecifier::uArg:
273     case ConversionSpecifier::UArg:
274     case ConversionSpecifier::xArg:
275     case ConversionSpecifier::XArg:
276       switch (LM.getKind()) {
277         case LengthModifier::None:
278           return ArgType::PtrTo(Ctx.UnsignedIntTy);
279         case LengthModifier::AsChar:
280           return ArgType::PtrTo(Ctx.UnsignedCharTy);
281         case LengthModifier::AsShort:
282           return ArgType::PtrTo(Ctx.UnsignedShortTy);
283         case LengthModifier::AsLong:
284           return ArgType::PtrTo(Ctx.UnsignedLongTy);
285         case LengthModifier::AsLongLong:
286         case LengthModifier::AsQuad:
287           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288         case LengthModifier::AsInt64:
289           return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290         case LengthModifier::AsIntMax:
291           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292         case LengthModifier::AsSizeT:
293           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294         case LengthModifier::AsPtrDiff:
295           return ArgType::PtrTo(
296               ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297         case LengthModifier::AsLongDouble:
298           // GNU extension.
299           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300         case LengthModifier::AsAllocate:
301         case LengthModifier::AsMAllocate:
302         case LengthModifier::AsInt32:
303         case LengthModifier::AsInt3264:
304         case LengthModifier::AsWide:
305         case LengthModifier::AsShortLong:
306           return ArgType::Invalid();
307       }
308       llvm_unreachable("Unsupported LengthModifier Type");
309 
310     // Float.
311     case ConversionSpecifier::aArg:
312     case ConversionSpecifier::AArg:
313     case ConversionSpecifier::eArg:
314     case ConversionSpecifier::EArg:
315     case ConversionSpecifier::fArg:
316     case ConversionSpecifier::FArg:
317     case ConversionSpecifier::gArg:
318     case ConversionSpecifier::GArg:
319       switch (LM.getKind()) {
320         case LengthModifier::None:
321           return ArgType::PtrTo(Ctx.FloatTy);
322         case LengthModifier::AsLong:
323           return ArgType::PtrTo(Ctx.DoubleTy);
324         case LengthModifier::AsLongDouble:
325           return ArgType::PtrTo(Ctx.LongDoubleTy);
326         default:
327           return ArgType::Invalid();
328       }
329 
330     // Char, string and scanlist.
331     case ConversionSpecifier::cArg:
332     case ConversionSpecifier::sArg:
333     case ConversionSpecifier::ScanListArg:
334       switch (LM.getKind()) {
335         case LengthModifier::None:
336           return ArgType::PtrTo(ArgType::AnyCharTy);
337         case LengthModifier::AsLong:
338         case LengthModifier::AsWide:
339           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
340         case LengthModifier::AsAllocate:
341         case LengthModifier::AsMAllocate:
342           return ArgType::PtrTo(ArgType::CStrTy);
343         case LengthModifier::AsShort:
344           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345             return ArgType::PtrTo(ArgType::AnyCharTy);
346           LLVM_FALLTHROUGH;
347         default:
348           return ArgType::Invalid();
349       }
350     case ConversionSpecifier::CArg:
351     case ConversionSpecifier::SArg:
352       // FIXME: Mac OS X specific?
353       switch (LM.getKind()) {
354         case LengthModifier::None:
355         case LengthModifier::AsWide:
356           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
357         case LengthModifier::AsAllocate:
358         case LengthModifier::AsMAllocate:
359           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360         case LengthModifier::AsShort:
361           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362             return ArgType::PtrTo(ArgType::AnyCharTy);
363           LLVM_FALLTHROUGH;
364         default:
365           return ArgType::Invalid();
366       }
367 
368     // Pointer.
369     case ConversionSpecifier::pArg:
370       return ArgType::PtrTo(ArgType::CPointerTy);
371 
372     // Write-back.
373     case ConversionSpecifier::nArg:
374       switch (LM.getKind()) {
375         case LengthModifier::None:
376           return ArgType::PtrTo(Ctx.IntTy);
377         case LengthModifier::AsChar:
378           return ArgType::PtrTo(Ctx.SignedCharTy);
379         case LengthModifier::AsShort:
380           return ArgType::PtrTo(Ctx.ShortTy);
381         case LengthModifier::AsLong:
382           return ArgType::PtrTo(Ctx.LongTy);
383         case LengthModifier::AsLongLong:
384         case LengthModifier::AsQuad:
385           return ArgType::PtrTo(Ctx.LongLongTy);
386         case LengthModifier::AsInt64:
387           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388         case LengthModifier::AsIntMax:
389           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390         case LengthModifier::AsSizeT:
391           return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392         case LengthModifier::AsPtrDiff:
393           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394         case LengthModifier::AsLongDouble:
395           return ArgType(); // FIXME: Is this a known extension?
396         case LengthModifier::AsAllocate:
397         case LengthModifier::AsMAllocate:
398         case LengthModifier::AsInt32:
399         case LengthModifier::AsInt3264:
400         case LengthModifier::AsWide:
401         case LengthModifier::AsShortLong:
402           return ArgType::Invalid();
403         }
404 
405     default:
406       break;
407   }
408 
409   return ArgType();
410 }
411 
412 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413                              const LangOptions &LangOpt,
414                              ASTContext &Ctx) {
415 
416   // %n is different from other conversion specifiers; don't try to fix it.
417   if (CS.getKind() == ConversionSpecifier::nArg)
418     return false;
419 
420   if (!QT->isPointerType())
421     return false;
422 
423   QualType PT = QT->getPointeeType();
424 
425   // If it's an enum, get its underlying type.
426   if (const EnumType *ETy = PT->getAs<EnumType>()) {
427     // Don't try to fix incomplete enums.
428     if (!ETy->getDecl()->isComplete())
429       return false;
430     PT = ETy->getDecl()->getIntegerType();
431   }
432 
433   const BuiltinType *BT = PT->getAs<BuiltinType>();
434   if (!BT)
435     return false;
436 
437   // Pointer to a character.
438   if (PT->isAnyCharacterType()) {
439     CS.setKind(ConversionSpecifier::sArg);
440     if (PT->isWideCharType())
441       LM.setKind(LengthModifier::AsWideChar);
442     else
443       LM.setKind(LengthModifier::None);
444 
445     // If we know the target array length, we can use it as a field width.
446     if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447       if (CAT->getSizeModifier() == ArrayType::Normal)
448         FieldWidth = OptionalAmount(OptionalAmount::Constant,
449                                     CAT->getSize().getZExtValue() - 1,
450                                     "", 0, false);
451 
452     }
453     return true;
454   }
455 
456   // Figure out the length modifier.
457   switch (BT->getKind()) {
458     // no modifier
459     case BuiltinType::UInt:
460     case BuiltinType::Int:
461     case BuiltinType::Float:
462       LM.setKind(LengthModifier::None);
463       break;
464 
465     // hh
466     case BuiltinType::Char_U:
467     case BuiltinType::UChar:
468     case BuiltinType::Char_S:
469     case BuiltinType::SChar:
470       LM.setKind(LengthModifier::AsChar);
471       break;
472 
473     // h
474     case BuiltinType::Short:
475     case BuiltinType::UShort:
476       LM.setKind(LengthModifier::AsShort);
477       break;
478 
479     // l
480     case BuiltinType::Long:
481     case BuiltinType::ULong:
482     case BuiltinType::Double:
483       LM.setKind(LengthModifier::AsLong);
484       break;
485 
486     // ll
487     case BuiltinType::LongLong:
488     case BuiltinType::ULongLong:
489       LM.setKind(LengthModifier::AsLongLong);
490       break;
491 
492     // L
493     case BuiltinType::LongDouble:
494       LM.setKind(LengthModifier::AsLongDouble);
495       break;
496 
497     // Don't know.
498     default:
499       return false;
500   }
501 
502   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504     namedTypeToLengthModifier(PT, LM);
505 
506   // If fixing the length modifier was enough, we are done.
507   if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508     const analyze_scanf::ArgType &AT = getArgType(Ctx);
509     if (AT.isValid() && AT.matchesType(Ctx, QT))
510       return true;
511   }
512 
513   // Figure out the conversion specifier.
514   if (PT->isRealFloatingType())
515     CS.setKind(ConversionSpecifier::fArg);
516   else if (PT->isSignedIntegerType())
517     CS.setKind(ConversionSpecifier::dArg);
518   else if (PT->isUnsignedIntegerType())
519     CS.setKind(ConversionSpecifier::uArg);
520   else
521     llvm_unreachable("Unexpected type");
522 
523   return true;
524 }
525 
526 void ScanfSpecifier::toString(raw_ostream &os) const {
527   os << "%";
528 
529   if (usesPositionalArg())
530     os << getPositionalArgIndex() << "$";
531   if (SuppressAssignment)
532     os << "*";
533 
534   FieldWidth.toString(os);
535   os << LM.toString();
536   os << CS.toString();
537 }
538 
539 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
540                                                     const char *I,
541                                                     const char *E,
542                                                     const LangOptions &LO,
543                                                     const TargetInfo &Target) {
544 
545   unsigned argIndex = 0;
546 
547   // Keep looking for a format specifier until we have exhausted the string.
548   while (I != E) {
549     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550                                                           LO, Target);
551     // Did a fail-stop error of any kind occur when parsing the specifier?
552     // If so, don't do any more processing.
553     if (FSR.shouldStop())
554       return true;
555       // Did we exhaust the string or encounter an error that
556       // we can recover from?
557     if (!FSR.hasValue())
558       continue;
559       // We have a format specifier.  Pass it to the callback.
560     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561                                 I - FSR.getStart())) {
562       return true;
563     }
564   }
565   assert(I == E && "Format string not exhausted");
566   return false;
567 }
568