xref: /freebsd/contrib/llvm-project/clang/lib/AST/ScanfFormatString.cpp (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in scanf and friends.  The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
17 
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfConversionSpecifier;
24 using clang::analyze_scanf::ScanfSpecifier;
25 using clang::UpdateOnReturn;
26 using namespace clang;
27 
28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29         ScanfSpecifierResult;
30 
31 static bool ParseScanList(FormatStringHandler &H,
32                           ScanfConversionSpecifier &CS,
33                           const char *&Beg, const char *E) {
34   const char *I = Beg;
35   const char *start = I - 1;
36   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37 
38   // No more characters?
39   if (I == E) {
40     H.HandleIncompleteScanList(start, I);
41     return true;
42   }
43 
44   // Special case: ']' is the first character.
45   if (*I == ']') {
46     if (++I == E) {
47       H.HandleIncompleteScanList(start, I - 1);
48       return true;
49     }
50   }
51 
52   // Special case: "^]" are the first characters.
53   if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54     I += 2;
55     if (I == E) {
56       H.HandleIncompleteScanList(start, I - 1);
57       return true;
58     }
59   }
60 
61   // Look for a ']' character which denotes the end of the scan list.
62   while (*I != ']') {
63     if (++I == E) {
64       H.HandleIncompleteScanList(start, I - 1);
65       return true;
66     }
67   }
68 
69   CS.setEndScanList(I);
70   return false;
71 }
72 
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
75 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76                                                 const char *&Beg,
77                                                 const char *E,
78                                                 unsigned &argIndex,
79                                                 const LangOptions &LO,
80                                                 const TargetInfo &Target) {
81   using namespace clang::analyze_format_string;
82   using namespace clang::analyze_scanf;
83   const char *I = Beg;
84   const char *Start = nullptr;
85   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87     // Look for a '%' character that indicates the start of a format specifier.
88   for ( ; I != E ; ++I) {
89     char c = *I;
90     if (c == '\0') {
91         // Detect spurious null characters, which are likely errors.
92       H.HandleNullChar(I);
93       return true;
94     }
95     if (c == '%') {
96       Start = I++;  // Record the start of the format specifier.
97       break;
98     }
99   }
100 
101     // No format specifier found?
102   if (!Start)
103     return false;
104 
105   if (I == E) {
106       // No more characters left?
107     H.HandleIncompleteSpecifier(Start, E - Start);
108     return true;
109   }
110 
111   ScanfSpecifier FS;
112   if (ParseArgPosition(H, FS, Start, I, E))
113     return true;
114 
115   if (I == E) {
116       // No more characters left?
117     H.HandleIncompleteSpecifier(Start, E - Start);
118     return true;
119   }
120 
121   // Look for '*' flag if it is present.
122   if (*I == '*') {
123     FS.setSuppressAssignment(I);
124     if (++I == E) {
125       H.HandleIncompleteSpecifier(Start, E - Start);
126       return true;
127     }
128   }
129 
130   // Look for the field width (if any).  Unlike printf, this is either
131   // a fixed integer or isn't present.
132   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135     FS.setFieldWidth(Amt);
136 
137     if (I == E) {
138       // No more characters left?
139       H.HandleIncompleteSpecifier(Start, E - Start);
140       return true;
141     }
142   }
143 
144   // Look for the length modifier.
145   if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146       // No more characters left?
147     H.HandleIncompleteSpecifier(Start, E - Start);
148     return true;
149   }
150 
151   // Detect spurious null characters, which are likely errors.
152   if (*I == '\0') {
153     H.HandleNullChar(I);
154     return true;
155   }
156 
157   // Finally, look for the conversion specifier.
158   const char *conversionPosition = I++;
159   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160   switch (*conversionPosition) {
161     default:
162       break;
163     case '%': k = ConversionSpecifier::PercentArg;   break;
164     case 'b': k = ConversionSpecifier::bArg; break;
165     case 'A': k = ConversionSpecifier::AArg; break;
166     case 'E': k = ConversionSpecifier::EArg; break;
167     case 'F': k = ConversionSpecifier::FArg; break;
168     case 'G': k = ConversionSpecifier::GArg; break;
169     case 'X': k = ConversionSpecifier::XArg; break;
170     case 'a': k = ConversionSpecifier::aArg; break;
171     case 'd': k = ConversionSpecifier::dArg; break;
172     case 'e': k = ConversionSpecifier::eArg; break;
173     case 'f': k = ConversionSpecifier::fArg; break;
174     case 'g': k = ConversionSpecifier::gArg; break;
175     case 'i': k = ConversionSpecifier::iArg; break;
176     case 'n': k = ConversionSpecifier::nArg; break;
177     case 'c': k = ConversionSpecifier::cArg; break;
178     case 'C': k = ConversionSpecifier::CArg; break;
179     case 'S': k = ConversionSpecifier::SArg; break;
180     case '[': k = ConversionSpecifier::ScanListArg; break;
181     case 'u': k = ConversionSpecifier::uArg; break;
182     case 'x': k = ConversionSpecifier::xArg; break;
183     case 'o': k = ConversionSpecifier::oArg; break;
184     case 's': k = ConversionSpecifier::sArg; break;
185     case 'p': k = ConversionSpecifier::pArg; break;
186     // Apple extensions
187       // Apple-specific
188     case 'D':
189       if (Target.getTriple().isOSDarwin())
190         k = ConversionSpecifier::DArg;
191       break;
192     case 'O':
193       if (Target.getTriple().isOSDarwin())
194         k = ConversionSpecifier::OArg;
195       break;
196     case 'U':
197       if (Target.getTriple().isOSDarwin())
198         k = ConversionSpecifier::UArg;
199       break;
200   }
201   ScanfConversionSpecifier CS(conversionPosition, k);
202   if (k == ScanfConversionSpecifier::ScanListArg) {
203     if (ParseScanList(H, CS, I, E))
204       return true;
205   }
206   FS.setConversionSpecifier(CS);
207   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208       && !FS.usesPositionalArg())
209     FS.setArgIndex(argIndex++);
210 
211   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
212   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215     unsigned Len = I - Beg;
216     if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217       CS.setEndScanList(Beg + Len);
218       FS.setConversionSpecifier(CS);
219     }
220     // Assume the conversion takes one argument.
221     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222   }
223   return ScanfSpecifierResult(Start, FS);
224 }
225 
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227   const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 
229   if (!CS.consumesDataArgument())
230     return ArgType::Invalid();
231 
232   switch(CS.getKind()) {
233     // Signed int.
234     case ConversionSpecifier::dArg:
235     case ConversionSpecifier::DArg:
236     case ConversionSpecifier::iArg:
237       switch (LM.getKind()) {
238         case LengthModifier::None:
239           return ArgType::PtrTo(Ctx.IntTy);
240         case LengthModifier::AsChar:
241           return ArgType::PtrTo(ArgType::AnyCharTy);
242         case LengthModifier::AsShort:
243           return ArgType::PtrTo(Ctx.ShortTy);
244         case LengthModifier::AsLong:
245           return ArgType::PtrTo(Ctx.LongTy);
246         case LengthModifier::AsLongLong:
247         case LengthModifier::AsQuad:
248           return ArgType::PtrTo(Ctx.LongLongTy);
249         case LengthModifier::AsInt64:
250           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
251         case LengthModifier::AsIntMax:
252           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
253         case LengthModifier::AsSizeT:
254           return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
255         case LengthModifier::AsPtrDiff:
256           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
257         case LengthModifier::AsLongDouble:
258           // GNU extension.
259           return ArgType::PtrTo(Ctx.LongLongTy);
260         case LengthModifier::AsAllocate:
261         case LengthModifier::AsMAllocate:
262         case LengthModifier::AsInt32:
263         case LengthModifier::AsInt3264:
264         case LengthModifier::AsWide:
265         case LengthModifier::AsShortLong:
266           return ArgType::Invalid();
267       }
268       llvm_unreachable("Unsupported LengthModifier Type");
269 
270     // Unsigned int.
271     case ConversionSpecifier::bArg:
272     case ConversionSpecifier::oArg:
273     case ConversionSpecifier::OArg:
274     case ConversionSpecifier::uArg:
275     case ConversionSpecifier::UArg:
276     case ConversionSpecifier::xArg:
277     case ConversionSpecifier::XArg:
278       switch (LM.getKind()) {
279         case LengthModifier::None:
280           return ArgType::PtrTo(Ctx.UnsignedIntTy);
281         case LengthModifier::AsChar:
282           return ArgType::PtrTo(Ctx.UnsignedCharTy);
283         case LengthModifier::AsShort:
284           return ArgType::PtrTo(Ctx.UnsignedShortTy);
285         case LengthModifier::AsLong:
286           return ArgType::PtrTo(Ctx.UnsignedLongTy);
287         case LengthModifier::AsLongLong:
288         case LengthModifier::AsQuad:
289           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
290         case LengthModifier::AsInt64:
291           return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
292         case LengthModifier::AsIntMax:
293           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
294         case LengthModifier::AsSizeT:
295           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
296         case LengthModifier::AsPtrDiff:
297           return ArgType::PtrTo(
298               ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
299         case LengthModifier::AsLongDouble:
300           // GNU extension.
301           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
302         case LengthModifier::AsAllocate:
303         case LengthModifier::AsMAllocate:
304         case LengthModifier::AsInt32:
305         case LengthModifier::AsInt3264:
306         case LengthModifier::AsWide:
307         case LengthModifier::AsShortLong:
308           return ArgType::Invalid();
309       }
310       llvm_unreachable("Unsupported LengthModifier Type");
311 
312     // Float.
313     case ConversionSpecifier::aArg:
314     case ConversionSpecifier::AArg:
315     case ConversionSpecifier::eArg:
316     case ConversionSpecifier::EArg:
317     case ConversionSpecifier::fArg:
318     case ConversionSpecifier::FArg:
319     case ConversionSpecifier::gArg:
320     case ConversionSpecifier::GArg:
321       switch (LM.getKind()) {
322         case LengthModifier::None:
323           return ArgType::PtrTo(Ctx.FloatTy);
324         case LengthModifier::AsLong:
325           return ArgType::PtrTo(Ctx.DoubleTy);
326         case LengthModifier::AsLongDouble:
327           return ArgType::PtrTo(Ctx.LongDoubleTy);
328         default:
329           return ArgType::Invalid();
330       }
331 
332     // Char, string and scanlist.
333     case ConversionSpecifier::cArg:
334     case ConversionSpecifier::sArg:
335     case ConversionSpecifier::ScanListArg:
336       switch (LM.getKind()) {
337         case LengthModifier::None:
338           return ArgType::PtrTo(ArgType::AnyCharTy);
339         case LengthModifier::AsLong:
340         case LengthModifier::AsWide:
341           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
342         case LengthModifier::AsAllocate:
343         case LengthModifier::AsMAllocate:
344           return ArgType::PtrTo(ArgType::CStrTy);
345         case LengthModifier::AsShort:
346           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
347             return ArgType::PtrTo(ArgType::AnyCharTy);
348           [[fallthrough]];
349         default:
350           return ArgType::Invalid();
351       }
352     case ConversionSpecifier::CArg:
353     case ConversionSpecifier::SArg:
354       // FIXME: Mac OS X specific?
355       switch (LM.getKind()) {
356         case LengthModifier::None:
357         case LengthModifier::AsWide:
358           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
359         case LengthModifier::AsAllocate:
360         case LengthModifier::AsMAllocate:
361           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
362         case LengthModifier::AsShort:
363           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
364             return ArgType::PtrTo(ArgType::AnyCharTy);
365           [[fallthrough]];
366         default:
367           return ArgType::Invalid();
368       }
369 
370     // Pointer.
371     case ConversionSpecifier::pArg:
372       return ArgType::PtrTo(ArgType::CPointerTy);
373 
374     // Write-back.
375     case ConversionSpecifier::nArg:
376       switch (LM.getKind()) {
377         case LengthModifier::None:
378           return ArgType::PtrTo(Ctx.IntTy);
379         case LengthModifier::AsChar:
380           return ArgType::PtrTo(Ctx.SignedCharTy);
381         case LengthModifier::AsShort:
382           return ArgType::PtrTo(Ctx.ShortTy);
383         case LengthModifier::AsLong:
384           return ArgType::PtrTo(Ctx.LongTy);
385         case LengthModifier::AsLongLong:
386         case LengthModifier::AsQuad:
387           return ArgType::PtrTo(Ctx.LongLongTy);
388         case LengthModifier::AsInt64:
389           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
390         case LengthModifier::AsIntMax:
391           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
392         case LengthModifier::AsSizeT:
393           return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
394         case LengthModifier::AsPtrDiff:
395           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
396         case LengthModifier::AsLongDouble:
397           return ArgType(); // FIXME: Is this a known extension?
398         case LengthModifier::AsAllocate:
399         case LengthModifier::AsMAllocate:
400         case LengthModifier::AsInt32:
401         case LengthModifier::AsInt3264:
402         case LengthModifier::AsWide:
403         case LengthModifier::AsShortLong:
404           return ArgType::Invalid();
405         }
406 
407     default:
408       break;
409   }
410 
411   return ArgType();
412 }
413 
414 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
415                              const LangOptions &LangOpt,
416                              ASTContext &Ctx) {
417 
418   // %n is different from other conversion specifiers; don't try to fix it.
419   if (CS.getKind() == ConversionSpecifier::nArg)
420     return false;
421 
422   if (!QT->isPointerType())
423     return false;
424 
425   QualType PT = QT->getPointeeType();
426 
427   // If it's an enum, get its underlying type.
428   if (const EnumType *ETy = PT->getAs<EnumType>()) {
429     // Don't try to fix incomplete enums.
430     if (!ETy->getDecl()->isComplete())
431       return false;
432     PT = ETy->getDecl()->getIntegerType();
433   }
434 
435   const BuiltinType *BT = PT->getAs<BuiltinType>();
436   if (!BT)
437     return false;
438 
439   // Pointer to a character.
440   if (PT->isAnyCharacterType()) {
441     CS.setKind(ConversionSpecifier::sArg);
442     if (PT->isWideCharType())
443       LM.setKind(LengthModifier::AsWideChar);
444     else
445       LM.setKind(LengthModifier::None);
446 
447     // If we know the target array length, we can use it as a field width.
448     if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
449       if (CAT->getSizeModifier() == ArrayType::Normal)
450         FieldWidth = OptionalAmount(OptionalAmount::Constant,
451                                     CAT->getSize().getZExtValue() - 1,
452                                     "", 0, false);
453 
454     }
455     return true;
456   }
457 
458   // Figure out the length modifier.
459   switch (BT->getKind()) {
460     // no modifier
461     case BuiltinType::UInt:
462     case BuiltinType::Int:
463     case BuiltinType::Float:
464       LM.setKind(LengthModifier::None);
465       break;
466 
467     // hh
468     case BuiltinType::Char_U:
469     case BuiltinType::UChar:
470     case BuiltinType::Char_S:
471     case BuiltinType::SChar:
472       LM.setKind(LengthModifier::AsChar);
473       break;
474 
475     // h
476     case BuiltinType::Short:
477     case BuiltinType::UShort:
478       LM.setKind(LengthModifier::AsShort);
479       break;
480 
481     // l
482     case BuiltinType::Long:
483     case BuiltinType::ULong:
484     case BuiltinType::Double:
485       LM.setKind(LengthModifier::AsLong);
486       break;
487 
488     // ll
489     case BuiltinType::LongLong:
490     case BuiltinType::ULongLong:
491       LM.setKind(LengthModifier::AsLongLong);
492       break;
493 
494     // L
495     case BuiltinType::LongDouble:
496       LM.setKind(LengthModifier::AsLongDouble);
497       break;
498 
499     // Don't know.
500     default:
501       return false;
502   }
503 
504   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
505   if (LangOpt.C99 || LangOpt.CPlusPlus11)
506     namedTypeToLengthModifier(PT, LM);
507 
508   // If fixing the length modifier was enough, we are done.
509   if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
510     const analyze_scanf::ArgType &AT = getArgType(Ctx);
511     if (AT.isValid() && AT.matchesType(Ctx, QT))
512       return true;
513   }
514 
515   // Figure out the conversion specifier.
516   if (PT->isRealFloatingType())
517     CS.setKind(ConversionSpecifier::fArg);
518   else if (PT->isSignedIntegerType())
519     CS.setKind(ConversionSpecifier::dArg);
520   else if (PT->isUnsignedIntegerType())
521     CS.setKind(ConversionSpecifier::uArg);
522   else
523     llvm_unreachable("Unexpected type");
524 
525   return true;
526 }
527 
528 void ScanfSpecifier::toString(raw_ostream &os) const {
529   os << "%";
530 
531   if (usesPositionalArg())
532     os << getPositionalArgIndex() << "$";
533   if (SuppressAssignment)
534     os << "*";
535 
536   FieldWidth.toString(os);
537   os << LM.toString();
538   os << CS.toString();
539 }
540 
541 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
542                                                     const char *I,
543                                                     const char *E,
544                                                     const LangOptions &LO,
545                                                     const TargetInfo &Target) {
546 
547   unsigned argIndex = 0;
548 
549   // Keep looking for a format specifier until we have exhausted the string.
550   while (I != E) {
551     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
552                                                           LO, Target);
553     // Did a fail-stop error of any kind occur when parsing the specifier?
554     // If so, don't do any more processing.
555     if (FSR.shouldStop())
556       return true;
557       // Did we exhaust the string or encounter an error that
558       // we can recover from?
559     if (!FSR.hasValue())
560       continue;
561       // We have a format specifier.  Pass it to the callback.
562     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
563                                 I - FSR.getStart())) {
564       return true;
565     }
566   }
567   assert(I == E && "Format string not exhausted");
568   return false;
569 }
570