xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPExpressions.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Preprocessor::EvaluateDirectiveExpression method,
10 // which parses and evaluates integer constant expressions for #if directives.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // FIXME: implement testing for #assert's.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "clang/Basic/IdentifierTable.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TargetInfo.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/CodeCompletionHandler.h"
24 #include "clang/Lex/LexDiagnostic.h"
25 #include "clang/Lex/LiteralSupport.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/PPCallbacks.h"
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Lex/PreprocessorOptions.h"
30 #include "clang/Lex/Token.h"
31 #include "llvm/ADT/APSInt.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/SaveAndRestore.h"
37 #include <cassert>
38 
39 using namespace clang;
40 
41 namespace {
42 
43 /// PPValue - Represents the value of a subexpression of a preprocessor
44 /// conditional and the source range covered by it.
45 class PPValue {
46   SourceRange Range;
47   IdentifierInfo *II = nullptr;
48 
49 public:
50   llvm::APSInt Val;
51 
52   // Default ctor - Construct an 'invalid' PPValue.
PPValue(unsigned BitWidth)53   PPValue(unsigned BitWidth) : Val(BitWidth) {}
54 
55   // If this value was produced by directly evaluating an identifier, produce
56   // that identifier.
getIdentifier() const57   IdentifierInfo *getIdentifier() const { return II; }
setIdentifier(IdentifierInfo * II)58   void setIdentifier(IdentifierInfo *II) { this->II = II; }
59 
getBitWidth() const60   unsigned getBitWidth() const { return Val.getBitWidth(); }
isUnsigned() const61   bool isUnsigned() const { return Val.isUnsigned(); }
62 
getRange() const63   SourceRange getRange() const { return Range; }
64 
setRange(SourceLocation L)65   void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); }
setRange(SourceLocation B,SourceLocation E)66   void setRange(SourceLocation B, SourceLocation E) {
67     Range.setBegin(B); Range.setEnd(E);
68   }
setBegin(SourceLocation L)69   void setBegin(SourceLocation L) { Range.setBegin(L); }
setEnd(SourceLocation L)70   void setEnd(SourceLocation L) { Range.setEnd(L); }
71 };
72 
73 } // end anonymous namespace
74 
75 static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
76                                      Token &PeekTok, bool ValueLive,
77                                      bool &IncludedUndefinedIds,
78                                      Preprocessor &PP);
79 
80 /// DefinedTracker - This struct is used while parsing expressions to keep track
81 /// of whether !defined(X) has been seen.
82 ///
83 /// With this simple scheme, we handle the basic forms:
84 ///    !defined(X)   and !defined X
85 /// but we also trivially handle (silly) stuff like:
86 ///    !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)).
87 struct DefinedTracker {
88   /// Each time a Value is evaluated, it returns information about whether the
89   /// parsed value is of the form defined(X), !defined(X) or is something else.
90   enum TrackerState {
91     DefinedMacro,        // defined(X)
92     NotDefinedMacro,     // !defined(X)
93     Unknown              // Something else.
94   } State;
95   /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
96   /// indicates the macro that was checked.
97   IdentifierInfo *TheMacro;
98   bool IncludedUndefinedIds = false;
99 };
100 
101 /// EvaluateDefined - Process a 'defined(sym)' expression.
EvaluateDefined(PPValue & Result,Token & PeekTok,DefinedTracker & DT,bool ValueLive,Preprocessor & PP)102 static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
103                             bool ValueLive, Preprocessor &PP) {
104   SourceLocation beginLoc(PeekTok.getLocation());
105   Result.setBegin(beginLoc);
106 
107   // Get the next token, don't expand it.
108   PP.LexUnexpandedNonComment(PeekTok);
109 
110   // Two options, it can either be a pp-identifier or a (.
111   SourceLocation LParenLoc;
112   if (PeekTok.is(tok::l_paren)) {
113     // Found a paren, remember we saw it and skip it.
114     LParenLoc = PeekTok.getLocation();
115     PP.LexUnexpandedNonComment(PeekTok);
116   }
117 
118   if (PeekTok.is(tok::code_completion)) {
119     if (PP.getCodeCompletionHandler())
120       PP.getCodeCompletionHandler()->CodeCompleteMacroName(false);
121     PP.setCodeCompletionReached();
122     PP.LexUnexpandedNonComment(PeekTok);
123   }
124 
125   // If we don't have a pp-identifier now, this is an error.
126   if (PP.CheckMacroName(PeekTok, MU_Other))
127     return true;
128 
129   // Otherwise, we got an identifier, is it defined to something?
130   IdentifierInfo *II = PeekTok.getIdentifierInfo();
131   MacroDefinition Macro = PP.getMacroDefinition(II);
132   Result.Val = !!Macro;
133   Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
134   DT.IncludedUndefinedIds = !Macro;
135 
136   PP.emitMacroExpansionWarnings(
137       PeekTok,
138       (II->getName() == "INFINITY" || II->getName() == "NAN") ? true : false);
139 
140   // If there is a macro, mark it used.
141   if (Result.Val != 0 && ValueLive)
142     PP.markMacroAsUsed(Macro.getMacroInfo());
143 
144   // Save macro token for callback.
145   Token macroToken(PeekTok);
146 
147   // If we are in parens, ensure we have a trailing ).
148   if (LParenLoc.isValid()) {
149     // Consume identifier.
150     Result.setEnd(PeekTok.getLocation());
151     PP.LexUnexpandedNonComment(PeekTok);
152 
153     if (PeekTok.isNot(tok::r_paren)) {
154       PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_after)
155           << "'defined'" << tok::r_paren;
156       PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
157       return true;
158     }
159     // Consume the ).
160     PP.LexNonComment(PeekTok);
161     Result.setEnd(PeekTok.getLocation());
162   } else {
163     // Consume identifier.
164     Result.setEnd(PeekTok.getLocation());
165     PP.LexNonComment(PeekTok);
166   }
167 
168   // [cpp.cond]p4:
169   //   Prior to evaluation, macro invocations in the list of preprocessing
170   //   tokens that will become the controlling constant expression are replaced
171   //   (except for those macro names modified by the 'defined' unary operator),
172   //   just as in normal text. If the token 'defined' is generated as a result
173   //   of this replacement process or use of the 'defined' unary operator does
174   //   not match one of the two specified forms prior to macro replacement, the
175   //   behavior is undefined.
176   // This isn't an idle threat, consider this program:
177   //   #define FOO
178   //   #define BAR defined(FOO)
179   //   #if BAR
180   //   ...
181   //   #else
182   //   ...
183   //   #endif
184   // clang and gcc will pick the #if branch while Visual Studio will take the
185   // #else branch.  Emit a warning about this undefined behavior.
186   if (beginLoc.isMacroID()) {
187     bool IsFunctionTypeMacro =
188         PP.getSourceManager()
189             .getSLocEntry(PP.getSourceManager().getFileID(beginLoc))
190             .getExpansion()
191             .isFunctionMacroExpansion();
192     // For object-type macros, it's easy to replace
193     //   #define FOO defined(BAR)
194     // with
195     //   #if defined(BAR)
196     //   #define FOO 1
197     //   #else
198     //   #define FOO 0
199     //   #endif
200     // and doing so makes sense since compilers handle this differently in
201     // practice (see example further up).  But for function-type macros,
202     // there is no good way to write
203     //   # define FOO(x) (defined(M_ ## x) && M_ ## x)
204     // in a different way, and compilers seem to agree on how to behave here.
205     // So warn by default on object-type macros, but only warn in -pedantic
206     // mode on function-type macros.
207     if (IsFunctionTypeMacro)
208       PP.Diag(beginLoc, diag::warn_defined_in_function_type_macro);
209     else
210       PP.Diag(beginLoc, diag::warn_defined_in_object_type_macro);
211   }
212 
213   // Invoke the 'defined' callback.
214   if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
215     Callbacks->Defined(macroToken, Macro,
216                        SourceRange(beginLoc, PeekTok.getLocation()));
217   }
218 
219   // Success, remember that we saw defined(X).
220   DT.State = DefinedTracker::DefinedMacro;
221   DT.TheMacro = II;
222   return false;
223 }
224 
225 /// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
226 /// return the computed value in Result.  Return true if there was an error
227 /// parsing.  This function also returns information about the form of the
228 /// expression in DT.  See above for information on what DT means.
229 ///
230 /// If ValueLive is false, then this value is being evaluated in a context where
231 /// the result is not used.  As such, avoid diagnostics that relate to
232 /// evaluation.
EvaluateValue(PPValue & Result,Token & PeekTok,DefinedTracker & DT,bool ValueLive,Preprocessor & PP)233 static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
234                           bool ValueLive, Preprocessor &PP) {
235   DT.State = DefinedTracker::Unknown;
236 
237   Result.setIdentifier(nullptr);
238 
239   if (PeekTok.is(tok::code_completion)) {
240     if (PP.getCodeCompletionHandler())
241       PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();
242     PP.setCodeCompletionReached();
243     PP.LexNonComment(PeekTok);
244   }
245 
246   switch (PeekTok.getKind()) {
247   default:
248     // If this token's spelling is a pp-identifier, check to see if it is
249     // 'defined' or if it is a macro.  Note that we check here because many
250     // keywords are pp-identifiers, so we can't check the kind.
251     if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
252       // Handle "defined X" and "defined(X)".
253       if (II->isStr("defined"))
254         return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP);
255 
256       if (!II->isCPlusPlusOperatorKeyword()) {
257         // If this identifier isn't 'defined' or one of the special
258         // preprocessor keywords and it wasn't macro expanded, it turns
259         // into a simple 0
260         if (ValueLive) {
261           unsigned DiagID = II->getName() == "true"
262                                 ? diag::warn_pp_undef_true_identifier
263                                 : diag::warn_pp_undef_identifier;
264           PP.Diag(PeekTok, DiagID) << II;
265 
266           const DiagnosticsEngine &DiagEngine = PP.getDiagnostics();
267           // If 'Wundef' is enabled, do not emit 'undef-prefix' diagnostics.
268           if (DiagEngine.isIgnored(DiagID, PeekTok.getLocation())) {
269             const std::vector<std::string> UndefPrefixes =
270                 DiagEngine.getDiagnosticOptions().UndefPrefixes;
271             const StringRef IdentifierName = II->getName();
272             if (llvm::any_of(UndefPrefixes,
273                              [&IdentifierName](const std::string &Prefix) {
274                                return IdentifierName.starts_with(Prefix);
275                              }))
276               PP.Diag(PeekTok, diag::warn_pp_undef_prefix)
277                   << AddFlagValue{llvm::join(UndefPrefixes, ",")} << II;
278           }
279         }
280         Result.Val = 0;
281         Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
282         Result.setIdentifier(II);
283         Result.setRange(PeekTok.getLocation());
284         DT.IncludedUndefinedIds = true;
285         PP.LexNonComment(PeekTok);
286         return false;
287       }
288     }
289     PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr);
290     return true;
291   case tok::eod:
292   case tok::r_paren:
293     // If there is no expression, report and exit.
294     PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
295     return true;
296   case tok::numeric_constant: {
297     SmallString<64> IntegerBuffer;
298     bool NumberInvalid = false;
299     StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,
300                                               &NumberInvalid);
301     if (NumberInvalid)
302       return true; // a diagnostic was already reported
303 
304     NumericLiteralParser Literal(Spelling, PeekTok.getLocation(),
305                                  PP.getSourceManager(), PP.getLangOpts(),
306                                  PP.getTargetInfo(), PP.getDiagnostics());
307     if (Literal.hadError)
308       return true; // a diagnostic was already reported.
309 
310     if (Literal.isFloatingLiteral() || Literal.isImaginary) {
311       PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
312       return true;
313     }
314     assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
315 
316     // Complain about, and drop, any ud-suffix.
317     if (Literal.hasUDSuffix())
318       PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1;
319 
320     // 'long long' is a C99 or C++11 feature.
321     if (!PP.getLangOpts().C99 && Literal.isLongLong) {
322       if (PP.getLangOpts().CPlusPlus)
323         PP.Diag(PeekTok,
324              PP.getLangOpts().CPlusPlus11 ?
325              diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
326       else
327         PP.Diag(PeekTok, diag::ext_c99_longlong);
328     }
329 
330     // 'z/uz' literals are a C++23 feature.
331     if (Literal.isSizeT)
332       PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus
333                            ? PP.getLangOpts().CPlusPlus23
334                                  ? diag::warn_cxx20_compat_size_t_suffix
335                                  : diag::ext_cxx23_size_t_suffix
336                            : diag::err_cxx23_size_t_suffix);
337 
338     // 'wb/uwb' literals are a C23 feature.
339     // '__wb/__uwb' are a C++ extension.
340     if (Literal.isBitInt)
341       PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
342                        : PP.getLangOpts().C23
343                            ? diag::warn_c23_compat_bitint_suffix
344                            : diag::ext_c23_bitint_suffix);
345 
346     // Parse the integer literal into Result.
347     if (Literal.GetIntegerValue(Result.Val)) {
348       // Overflow parsing integer literal.
349       PP.Diag(PeekTok, diag::err_integer_literal_too_large) << /* Unsigned */ 1;
350       Result.Val.setIsUnsigned(true);
351     } else {
352       // Set the signedness of the result to match whether there was a U suffix
353       // or not.
354       Result.Val.setIsUnsigned(Literal.isUnsigned);
355 
356       // Detect overflow based on whether the value is signed.  If signed
357       // and if the value is too large, emit a warning "integer constant is so
358       // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
359       // is 64-bits.
360       if (!Literal.isUnsigned && Result.Val.isNegative()) {
361         // Octal, hexadecimal, and binary literals are implicitly unsigned if
362         // the value does not fit into a signed integer type.
363         if (ValueLive && Literal.getRadix() == 10)
364           PP.Diag(PeekTok, diag::ext_integer_literal_too_large_for_signed);
365         Result.Val.setIsUnsigned(true);
366       }
367     }
368 
369     // Consume the token.
370     Result.setRange(PeekTok.getLocation());
371     PP.LexNonComment(PeekTok);
372     return false;
373   }
374   case tok::char_constant:          // 'x'
375   case tok::wide_char_constant:     // L'x'
376   case tok::utf8_char_constant:     // u8'x'
377   case tok::utf16_char_constant:    // u'x'
378   case tok::utf32_char_constant: {  // U'x'
379     // Complain about, and drop, any ud-suffix.
380     if (PeekTok.hasUDSuffix())
381       PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0;
382 
383     SmallString<32> CharBuffer;
384     bool CharInvalid = false;
385     StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
386     if (CharInvalid)
387       return true;
388 
389     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
390                               PeekTok.getLocation(), PP, PeekTok.getKind());
391     if (Literal.hadError())
392       return true;  // A diagnostic was already emitted.
393 
394     // Character literals are always int or wchar_t, expand to intmax_t.
395     const TargetInfo &TI = PP.getTargetInfo();
396     unsigned NumBits;
397     if (Literal.isMultiChar())
398       NumBits = TI.getIntWidth();
399     else if (Literal.isWide())
400       NumBits = TI.getWCharWidth();
401     else if (Literal.isUTF16())
402       NumBits = TI.getChar16Width();
403     else if (Literal.isUTF32())
404       NumBits = TI.getChar32Width();
405     else // char or char8_t
406       NumBits = TI.getCharWidth();
407 
408     // Set the width.
409     llvm::APSInt Val(NumBits);
410     // Set the value.
411     Val = Literal.getValue();
412     // Set the signedness. UTF-16 and UTF-32 are always unsigned
413     // UTF-8 is unsigned if -fchar8_t is specified.
414     if (Literal.isWide())
415       Val.setIsUnsigned(!TargetInfo::isTypeSigned(TI.getWCharType()));
416     else if (Literal.isUTF16() || Literal.isUTF32())
417       Val.setIsUnsigned(true);
418     else if (Literal.isUTF8()) {
419       if (PP.getLangOpts().CPlusPlus)
420         Val.setIsUnsigned(
421             PP.getLangOpts().Char8 ? true : !PP.getLangOpts().CharIsSigned);
422       else
423         Val.setIsUnsigned(true);
424     } else
425       Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned);
426 
427     if (Result.Val.getBitWidth() > Val.getBitWidth()) {
428       Result.Val = Val.extend(Result.Val.getBitWidth());
429     } else {
430       assert(Result.Val.getBitWidth() == Val.getBitWidth() &&
431              "intmax_t smaller than char/wchar_t?");
432       Result.Val = Val;
433     }
434 
435     // Consume the token.
436     Result.setRange(PeekTok.getLocation());
437     PP.LexNonComment(PeekTok);
438     return false;
439   }
440   case tok::l_paren: {
441     SourceLocation Start = PeekTok.getLocation();
442     PP.LexNonComment(PeekTok);  // Eat the (.
443     // Parse the value and if there are any binary operators involved, parse
444     // them.
445     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
446 
447     // If this is a silly value like (X), which doesn't need parens, check for
448     // !(defined X).
449     if (PeekTok.is(tok::r_paren)) {
450       // Just use DT unmodified as our result.
451     } else {
452       // Otherwise, we have something like (x+y), and we consumed '(x'.
453       if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive,
454                                    DT.IncludedUndefinedIds, PP))
455         return true;
456 
457       if (PeekTok.isNot(tok::r_paren)) {
458         PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen)
459           << Result.getRange();
460         PP.Diag(Start, diag::note_matching) << tok::l_paren;
461         return true;
462       }
463       DT.State = DefinedTracker::Unknown;
464     }
465     Result.setRange(Start, PeekTok.getLocation());
466     Result.setIdentifier(nullptr);
467     PP.LexNonComment(PeekTok);  // Eat the ).
468     return false;
469   }
470   case tok::plus: {
471     SourceLocation Start = PeekTok.getLocation();
472     // Unary plus doesn't modify the value.
473     PP.LexNonComment(PeekTok);
474     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
475     Result.setBegin(Start);
476     Result.setIdentifier(nullptr);
477     return false;
478   }
479   case tok::minus: {
480     SourceLocation Loc = PeekTok.getLocation();
481     PP.LexNonComment(PeekTok);
482     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
483     Result.setBegin(Loc);
484     Result.setIdentifier(nullptr);
485 
486     // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
487     Result.Val = -Result.Val;
488 
489     // -MININT is the only thing that overflows.  Unsigned never overflows.
490     bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue();
491 
492     // If this operator is live and overflowed, report the issue.
493     if (Overflow && ValueLive)
494       PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange();
495 
496     DT.State = DefinedTracker::Unknown;
497     return false;
498   }
499 
500   case tok::tilde: {
501     SourceLocation Start = PeekTok.getLocation();
502     PP.LexNonComment(PeekTok);
503     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
504     Result.setBegin(Start);
505     Result.setIdentifier(nullptr);
506 
507     // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
508     Result.Val = ~Result.Val;
509     DT.State = DefinedTracker::Unknown;
510     return false;
511   }
512 
513   case tok::exclaim: {
514     SourceLocation Start = PeekTok.getLocation();
515     PP.LexNonComment(PeekTok);
516     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
517     Result.setBegin(Start);
518     Result.Val = !Result.Val;
519     // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
520     Result.Val.setIsUnsigned(false);
521     Result.setIdentifier(nullptr);
522 
523     if (DT.State == DefinedTracker::DefinedMacro)
524       DT.State = DefinedTracker::NotDefinedMacro;
525     else if (DT.State == DefinedTracker::NotDefinedMacro)
526       DT.State = DefinedTracker::DefinedMacro;
527     return false;
528   }
529   case tok::kw_true:
530   case tok::kw_false:
531     Result.Val = PeekTok.getKind() == tok::kw_true;
532     Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
533     Result.setIdentifier(PeekTok.getIdentifierInfo());
534     Result.setRange(PeekTok.getLocation());
535     PP.LexNonComment(PeekTok);
536     return false;
537 
538   // FIXME: Handle #assert
539   }
540 }
541 
542 /// getPrecedence - Return the precedence of the specified binary operator
543 /// token.  This returns:
544 ///   ~0 - Invalid token.
545 ///   14 -> 3 - various operators.
546 ///    0 - 'eod' or ')'
getPrecedence(tok::TokenKind Kind)547 static unsigned getPrecedence(tok::TokenKind Kind) {
548   switch (Kind) {
549   default: return ~0U;
550   case tok::percent:
551   case tok::slash:
552   case tok::star:                 return 14;
553   case tok::plus:
554   case tok::minus:                return 13;
555   case tok::lessless:
556   case tok::greatergreater:       return 12;
557   case tok::lessequal:
558   case tok::less:
559   case tok::greaterequal:
560   case tok::greater:              return 11;
561   case tok::exclaimequal:
562   case tok::equalequal:           return 10;
563   case tok::amp:                  return 9;
564   case tok::caret:                return 8;
565   case tok::pipe:                 return 7;
566   case tok::ampamp:               return 6;
567   case tok::pipepipe:             return 5;
568   case tok::question:             return 4;
569   case tok::comma:                return 3;
570   case tok::colon:                return 2;
571   case tok::r_paren:              return 0;// Lowest priority, end of expr.
572   case tok::eod:                  return 0;// Lowest priority, end of directive.
573   }
574 }
575 
diagnoseUnexpectedOperator(Preprocessor & PP,PPValue & LHS,Token & Tok)576 static void diagnoseUnexpectedOperator(Preprocessor &PP, PPValue &LHS,
577                                        Token &Tok) {
578   if (Tok.is(tok::l_paren) && LHS.getIdentifier())
579     PP.Diag(LHS.getRange().getBegin(), diag::err_pp_expr_bad_token_lparen)
580         << LHS.getIdentifier();
581   else
582     PP.Diag(Tok.getLocation(), diag::err_pp_expr_bad_token_binop)
583         << LHS.getRange();
584 }
585 
586 /// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
587 /// PeekTok, and whose precedence is PeekPrec.  This returns the result in LHS.
588 ///
589 /// If ValueLive is false, then this value is being evaluated in a context where
590 /// the result is not used.  As such, avoid diagnostics that relate to
591 /// evaluation, such as division by zero warnings.
EvaluateDirectiveSubExpr(PPValue & LHS,unsigned MinPrec,Token & PeekTok,bool ValueLive,bool & IncludedUndefinedIds,Preprocessor & PP)592 static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
593                                      Token &PeekTok, bool ValueLive,
594                                      bool &IncludedUndefinedIds,
595                                      Preprocessor &PP) {
596   if (PP.getPreprocessorOpts().SingleFileParseMode && IncludedUndefinedIds) {
597     // The single-file parse mode behavior kicks in as soon as single identifier
598     // is undefined. If we've already seen one, there's no point in continuing
599     // with the rest of the expression. Besides saving work, this also prevents
600     // calling undefined function-like macros.
601     PP.DiscardUntilEndOfDirective(PeekTok);
602     return true;
603   }
604 
605   unsigned PeekPrec = getPrecedence(PeekTok.getKind());
606   // If this token isn't valid, report the error.
607   if (PeekPrec == ~0U) {
608     diagnoseUnexpectedOperator(PP, LHS, PeekTok);
609     return true;
610   }
611 
612   while (true) {
613     // If this token has a lower precedence than we are allowed to parse, return
614     // it so that higher levels of the recursion can parse it.
615     if (PeekPrec < MinPrec)
616       return false;
617 
618     tok::TokenKind Operator = PeekTok.getKind();
619 
620     // If this is a short-circuiting operator, see if the RHS of the operator is
621     // dead.  Note that this cannot just clobber ValueLive.  Consider
622     // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)".  In
623     // this example, the RHS of the && being dead does not make the rest of the
624     // expr dead.
625     bool RHSIsLive;
626     if (Operator == tok::ampamp && LHS.Val == 0)
627       RHSIsLive = false;   // RHS of "0 && x" is dead.
628     else if (Operator == tok::pipepipe && LHS.Val != 0)
629       RHSIsLive = false;   // RHS of "1 || x" is dead.
630     else if (Operator == tok::question && LHS.Val == 0)
631       RHSIsLive = false;   // RHS (x) of "0 ? x : y" is dead.
632     else
633       RHSIsLive = ValueLive;
634 
635     // Consume the operator, remembering the operator's location for reporting.
636     SourceLocation OpLoc = PeekTok.getLocation();
637     PP.LexNonComment(PeekTok);
638 
639     PPValue RHS(LHS.getBitWidth());
640     // Parse the RHS of the operator.
641     DefinedTracker DT;
642     if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
643     IncludedUndefinedIds = DT.IncludedUndefinedIds;
644 
645     // Remember the precedence of this operator and get the precedence of the
646     // operator immediately to the right of the RHS.
647     unsigned ThisPrec = PeekPrec;
648     PeekPrec = getPrecedence(PeekTok.getKind());
649 
650     // If this token isn't valid, report the error.
651     if (PeekPrec == ~0U) {
652       diagnoseUnexpectedOperator(PP, RHS, PeekTok);
653       return true;
654     }
655 
656     // Decide whether to include the next binop in this subexpression.  For
657     // example, when parsing x+y*z and looking at '*', we want to recursively
658     // handle y*z as a single subexpression.  We do this because the precedence
659     // of * is higher than that of +.  The only strange case we have to handle
660     // here is for the ?: operator, where the precedence is actually lower than
661     // the LHS of the '?'.  The grammar rule is:
662     //
663     // conditional-expression ::=
664     //    logical-OR-expression ? expression : conditional-expression
665     // where 'expression' is actually comma-expression.
666     unsigned RHSPrec;
667     if (Operator == tok::question)
668       // The RHS of "?" should be maximally consumed as an expression.
669       RHSPrec = getPrecedence(tok::comma);
670     else  // All others should munch while higher precedence.
671       RHSPrec = ThisPrec+1;
672 
673     if (PeekPrec >= RHSPrec) {
674       if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive,
675                                    IncludedUndefinedIds, PP))
676         return true;
677       PeekPrec = getPrecedence(PeekTok.getKind());
678     }
679     assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
680 
681     // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
682     // either operand is unsigned.
683     llvm::APSInt Res(LHS.getBitWidth());
684     switch (Operator) {
685     case tok::question:       // No UAC for x and y in "x ? y : z".
686     case tok::lessless:       // Shift amount doesn't UAC with shift value.
687     case tok::greatergreater: // Shift amount doesn't UAC with shift value.
688     case tok::comma:          // Comma operands are not subject to UACs.
689     case tok::pipepipe:       // Logical || does not do UACs.
690     case tok::ampamp:         // Logical && does not do UACs.
691       break;                  // No UAC
692     default:
693       Res.setIsUnsigned(LHS.isUnsigned() || RHS.isUnsigned());
694       // If this just promoted something from signed to unsigned, and if the
695       // value was negative, warn about it.
696       if (ValueLive && Res.isUnsigned()) {
697         if (!LHS.isUnsigned() && LHS.Val.isNegative())
698           PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 0
699             << toString(LHS.Val, 10, true) + " to " +
700                toString(LHS.Val, 10, false)
701             << LHS.getRange() << RHS.getRange();
702         if (!RHS.isUnsigned() && RHS.Val.isNegative())
703           PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 1
704             << toString(RHS.Val, 10, true) + " to " +
705                toString(RHS.Val, 10, false)
706             << LHS.getRange() << RHS.getRange();
707       }
708       LHS.Val.setIsUnsigned(Res.isUnsigned());
709       RHS.Val.setIsUnsigned(Res.isUnsigned());
710     }
711 
712     bool Overflow = false;
713     switch (Operator) {
714     default: llvm_unreachable("Unknown operator token!");
715     case tok::percent:
716       if (RHS.Val != 0)
717         Res = LHS.Val % RHS.Val;
718       else if (ValueLive) {
719         PP.Diag(OpLoc, diag::err_pp_remainder_by_zero)
720           << LHS.getRange() << RHS.getRange();
721         return true;
722       }
723       break;
724     case tok::slash:
725       if (RHS.Val != 0) {
726         if (LHS.Val.isSigned())
727           Res = llvm::APSInt(LHS.Val.sdiv_ov(RHS.Val, Overflow), false);
728         else
729           Res = LHS.Val / RHS.Val;
730       } else if (ValueLive) {
731         PP.Diag(OpLoc, diag::err_pp_division_by_zero)
732           << LHS.getRange() << RHS.getRange();
733         return true;
734       }
735       break;
736 
737     case tok::star:
738       if (Res.isSigned())
739         Res = llvm::APSInt(LHS.Val.smul_ov(RHS.Val, Overflow), false);
740       else
741         Res = LHS.Val * RHS.Val;
742       break;
743     case tok::lessless: {
744       // Determine whether overflow is about to happen.
745       if (LHS.isUnsigned())
746         Res = LHS.Val.ushl_ov(RHS.Val, Overflow);
747       else
748         Res = llvm::APSInt(LHS.Val.sshl_ov(RHS.Val, Overflow), false);
749       break;
750     }
751     case tok::greatergreater: {
752       // Determine whether overflow is about to happen.
753       unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue());
754       if (ShAmt >= LHS.getBitWidth()) {
755         Overflow = true;
756         ShAmt = LHS.getBitWidth()-1;
757       }
758       Res = LHS.Val >> ShAmt;
759       break;
760     }
761     case tok::plus:
762       if (LHS.isUnsigned())
763         Res = LHS.Val + RHS.Val;
764       else
765         Res = llvm::APSInt(LHS.Val.sadd_ov(RHS.Val, Overflow), false);
766       break;
767     case tok::minus:
768       if (LHS.isUnsigned())
769         Res = LHS.Val - RHS.Val;
770       else
771         Res = llvm::APSInt(LHS.Val.ssub_ov(RHS.Val, Overflow), false);
772       break;
773     case tok::lessequal:
774       Res = LHS.Val <= RHS.Val;
775       Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
776       break;
777     case tok::less:
778       Res = LHS.Val < RHS.Val;
779       Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
780       break;
781     case tok::greaterequal:
782       Res = LHS.Val >= RHS.Val;
783       Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
784       break;
785     case tok::greater:
786       Res = LHS.Val > RHS.Val;
787       Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
788       break;
789     case tok::exclaimequal:
790       Res = LHS.Val != RHS.Val;
791       Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
792       break;
793     case tok::equalequal:
794       Res = LHS.Val == RHS.Val;
795       Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
796       break;
797     case tok::amp:
798       Res = LHS.Val & RHS.Val;
799       break;
800     case tok::caret:
801       Res = LHS.Val ^ RHS.Val;
802       break;
803     case tok::pipe:
804       Res = LHS.Val | RHS.Val;
805       break;
806     case tok::ampamp:
807       Res = (LHS.Val != 0 && RHS.Val != 0);
808       Res.setIsUnsigned(false);  // C99 6.5.13p3, result is always int (signed)
809       break;
810     case tok::pipepipe:
811       Res = (LHS.Val != 0 || RHS.Val != 0);
812       Res.setIsUnsigned(false);  // C99 6.5.14p3, result is always int (signed)
813       break;
814     case tok::comma:
815       // Comma is invalid in pp expressions in c89/c++ mode, but is valid in C99
816       // if not being evaluated.
817       if (!PP.getLangOpts().C99 || ValueLive)
818         PP.Diag(OpLoc, diag::ext_pp_comma_expr)
819           << LHS.getRange() << RHS.getRange();
820       Res = RHS.Val; // LHS = LHS,RHS -> RHS.
821       break;
822     case tok::question: {
823       // Parse the : part of the expression.
824       if (PeekTok.isNot(tok::colon)) {
825         PP.Diag(PeekTok.getLocation(), diag::err_expected)
826             << tok::colon << LHS.getRange() << RHS.getRange();
827         PP.Diag(OpLoc, diag::note_matching) << tok::question;
828         return true;
829       }
830       // Consume the :.
831       PP.LexNonComment(PeekTok);
832 
833       // Evaluate the value after the :.
834       bool AfterColonLive = ValueLive && LHS.Val == 0;
835       PPValue AfterColonVal(LHS.getBitWidth());
836       DefinedTracker DT;
837       if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP))
838         return true;
839 
840       // Parse anything after the : with the same precedence as ?.  We allow
841       // things of equal precedence because ?: is right associative.
842       if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec,
843                                    PeekTok, AfterColonLive,
844                                    IncludedUndefinedIds, PP))
845         return true;
846 
847       // Now that we have the condition, the LHS and the RHS of the :, evaluate.
848       Res = LHS.Val != 0 ? RHS.Val : AfterColonVal.Val;
849       RHS.setEnd(AfterColonVal.getRange().getEnd());
850 
851       // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
852       // either operand is unsigned.
853       Res.setIsUnsigned(RHS.isUnsigned() || AfterColonVal.isUnsigned());
854 
855       // Figure out the precedence of the token after the : part.
856       PeekPrec = getPrecedence(PeekTok.getKind());
857       break;
858     }
859     case tok::colon:
860       // Don't allow :'s to float around without being part of ?: exprs.
861       PP.Diag(OpLoc, diag::err_pp_colon_without_question)
862         << LHS.getRange() << RHS.getRange();
863       return true;
864     }
865 
866     // If this operator is live and overflowed, report the issue.
867     if (Overflow && ValueLive)
868       PP.Diag(OpLoc, diag::warn_pp_expr_overflow)
869         << LHS.getRange() << RHS.getRange();
870 
871     // Put the result back into 'LHS' for our next iteration.
872     LHS.Val = Res;
873     LHS.setEnd(RHS.getRange().getEnd());
874     RHS.setIdentifier(nullptr);
875   }
876 }
877 
878 /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
879 /// may occur after a #if or #elif directive.  If the expression is equivalent
880 /// to "!defined(X)" return X in IfNDefMacro.
881 Preprocessor::DirectiveEvalResult
EvaluateDirectiveExpression(IdentifierInfo * & IfNDefMacro,Token & Tok,bool & EvaluatedDefined,bool CheckForEoD)882 Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
883                                           Token &Tok, bool &EvaluatedDefined,
884                                           bool CheckForEoD) {
885   SaveAndRestore PPDir(ParsingIfOrElifDirective, true);
886   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
887   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
888   // in which case a directive is undefined behavior.  We want macros to be able
889   // to recursively expand in order to get more gcc-list behavior, so we force
890   // DisableMacroExpansion to false and restore it when we're done parsing the
891   // expression.
892   bool DisableMacroExpansionAtStartOfDirective = DisableMacroExpansion;
893   DisableMacroExpansion = false;
894 
895   // Peek ahead one token.
896   LexNonComment(Tok);
897 
898   // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
899   unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
900 
901   PPValue ResVal(BitWidth);
902   DefinedTracker DT;
903   SourceLocation ExprStartLoc = SourceMgr.getExpansionLoc(Tok.getLocation());
904   if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
905     // Parse error, skip the rest of the macro line.
906     if (Tok.isNot(tok::eod))
907       DiscardUntilEndOfDirective(Tok);
908 
909     // Restore 'DisableMacroExpansion'.
910     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
911 
912     // We cannot trust the source range from the value because there was a
913     // parse error. Track the range manually -- the end of the directive is the
914     // end of the condition range.
915     return {std::nullopt,
916             false,
917             DT.IncludedUndefinedIds,
918             {ExprStartLoc, Tok.getLocation()}};
919   }
920 
921   EvaluatedDefined = DT.State != DefinedTracker::Unknown;
922 
923   // If we are at the end of the expression after just parsing a value, there
924   // must be no (unparenthesized) binary operators involved, so we can exit
925   // directly.
926   if (Tok.is(tok::eod)) {
927     // If the expression we parsed was of the form !defined(macro), return the
928     // macro in IfNDefMacro.
929     if (DT.State == DefinedTracker::NotDefinedMacro)
930       IfNDefMacro = DT.TheMacro;
931 
932     // Restore 'DisableMacroExpansion'.
933     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
934     bool IsNonZero = ResVal.Val != 0;
935     SourceRange ValRange = ResVal.getRange();
936     return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds,
937             ValRange};
938   }
939 
940   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
941   // operator and the stuff after it.
942   if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question),
943                                Tok, true, DT.IncludedUndefinedIds, *this)) {
944     // Parse error, skip the rest of the macro line.
945     if (Tok.isNot(tok::eod))
946       DiscardUntilEndOfDirective(Tok);
947 
948     // Restore 'DisableMacroExpansion'.
949     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
950     return {std::nullopt,
951             false,
952             DT.IncludedUndefinedIds,
953             {ExprStartLoc, Tok.getLocation()}};
954   }
955 
956   if (CheckForEoD) {
957     // If we aren't at the tok::eod token, something bad happened, like an extra
958     // ')' token.
959     if (Tok.isNot(tok::eod)) {
960       Diag(Tok, diag::err_pp_expected_eol);
961       DiscardUntilEndOfDirective(Tok);
962     }
963   }
964 
965   EvaluatedDefined = EvaluatedDefined || DT.State != DefinedTracker::Unknown;
966 
967   // Restore 'DisableMacroExpansion'.
968   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
969   bool IsNonZero = ResVal.Val != 0;
970   SourceRange ValRange = ResVal.getRange();
971   return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange};
972 }
973 
974 Preprocessor::DirectiveEvalResult
EvaluateDirectiveExpression(IdentifierInfo * & IfNDefMacro,bool CheckForEoD)975 Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
976                                           bool CheckForEoD) {
977   Token Tok;
978   bool EvaluatedDefined;
979   return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined,
980                                      CheckForEoD);
981 }
982 
983 static std::optional<CXXStandardLibraryVersionInfo>
getCXXStandardLibraryVersion(Preprocessor & PP,StringRef MacroName,CXXStandardLibraryVersionInfo::Library Lib)984 getCXXStandardLibraryVersion(Preprocessor &PP, StringRef MacroName,
985                              CXXStandardLibraryVersionInfo::Library Lib) {
986   MacroInfo *Macro = PP.getMacroInfo(PP.getIdentifierInfo(MacroName));
987   if (!Macro || Macro->getNumTokens() != 1 || !Macro->isObjectLike())
988     return std::nullopt;
989 
990   const Token &RevisionDateTok = Macro->getReplacementToken(0);
991 
992   bool Invalid = false;
993   llvm::SmallVector<char, 10> Buffer;
994   llvm::StringRef RevisionDate =
995       PP.getSpelling(RevisionDateTok, Buffer, &Invalid);
996   if (!Invalid) {
997     std::uint64_t Value;
998     // We don't use NumericParser to avoid diagnostics
999     if (!RevisionDate.consumeInteger(10, Value))
1000       return CXXStandardLibraryVersionInfo{Lib, Value};
1001   }
1002   return CXXStandardLibraryVersionInfo{CXXStandardLibraryVersionInfo::Unknown,
1003                                        0};
1004 }
1005 
getStdLibCxxVersion()1006 std::optional<uint64_t> Preprocessor::getStdLibCxxVersion() {
1007   if (!CXXStandardLibraryVersion)
1008     CXXStandardLibraryVersion = getCXXStandardLibraryVersion(
1009         *this, "__GLIBCXX__", CXXStandardLibraryVersionInfo::LibStdCXX);
1010   if (!CXXStandardLibraryVersion)
1011     return std::nullopt;
1012 
1013   if (CXXStandardLibraryVersion->Lib ==
1014       CXXStandardLibraryVersionInfo::LibStdCXX)
1015     return CXXStandardLibraryVersion->Version;
1016   return std::nullopt;
1017 }
1018 
NeedsStdLibCxxWorkaroundBefore(uint64_t FixedVersion)1019 bool Preprocessor::NeedsStdLibCxxWorkaroundBefore(uint64_t FixedVersion) {
1020   assert(FixedVersion >= 2000'00'00 && FixedVersion <= 2100'00'00 &&
1021          "invalid value for __GLIBCXX__");
1022   std::optional<std::uint64_t> Ver = getStdLibCxxVersion();
1023   if (!Ver)
1024     return false;
1025   return *Ver < FixedVersion;
1026 }
1027