xref: /freebsd/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11 /// literal separators.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IntegerLiteralSeparatorFixer.h"
16 
17 namespace clang {
18 namespace format {
19 
20 enum class Base { Binary, Decimal, Hex, Other };
21 
getBase(const StringRef IntegerLiteral)22 static Base getBase(const StringRef IntegerLiteral) {
23   assert(IntegerLiteral.size() > 1);
24 
25   if (IntegerLiteral[0] > '0') {
26     assert(IntegerLiteral[0] <= '9');
27     return Base::Decimal;
28   }
29 
30   assert(IntegerLiteral[0] == '0');
31 
32   switch (IntegerLiteral[1]) {
33   case 'b':
34   case 'B':
35     return Base::Binary;
36   case 'x':
37   case 'X':
38     return Base::Hex;
39   default:
40     return Base::Other;
41   }
42 }
43 
44 std::pair<tooling::Replacements, unsigned>
process(const Environment & Env,const FormatStyle & Style)45 IntegerLiteralSeparatorFixer::process(const Environment &Env,
46                                       const FormatStyle &Style) {
47   switch (Style.Language) {
48   case FormatStyle::LK_CSharp:
49   case FormatStyle::LK_Java:
50   case FormatStyle::LK_JavaScript:
51     Separator = '_';
52     break;
53   case FormatStyle::LK_Cpp:
54   case FormatStyle::LK_ObjC:
55     if (Style.Standard >= FormatStyle::LS_Cpp14) {
56       Separator = '\'';
57       break;
58     }
59     [[fallthrough]];
60   default:
61     return {};
62   }
63 
64   const auto &Option = Style.IntegerLiteralSeparator;
65   const auto Binary = Option.Binary;
66   const auto Decimal = Option.Decimal;
67   const auto Hex = Option.Hex;
68   const bool SkipBinary = Binary == 0;
69   const bool SkipDecimal = Decimal == 0;
70   const bool SkipHex = Hex == 0;
71 
72   if (SkipBinary && SkipDecimal && SkipHex)
73     return {};
74 
75   const auto BinaryMinDigits =
76       std::max((int)Option.BinaryMinDigits, Binary + 1);
77   const auto DecimalMinDigits =
78       std::max((int)Option.DecimalMinDigits, Decimal + 1);
79   const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
80 
81   const auto &SourceMgr = Env.getSourceManager();
82   AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
83 
84   const auto ID = Env.getFileID();
85   const auto LangOpts = getFormattingLangOpts(Style);
86   Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
87   Lex.SetCommentRetentionState(true);
88 
89   Token Tok;
90   tooling::Replacements Result;
91 
92   for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
93     auto Length = Tok.getLength();
94     if (Length < 2)
95       continue;
96     auto Location = Tok.getLocation();
97     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
98     if (Tok.is(tok::comment)) {
99       if (isClangFormatOff(Text))
100         Skip = true;
101       else if (isClangFormatOn(Text))
102         Skip = false;
103       continue;
104     }
105     if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
106         !AffectedRangeMgr.affectsCharSourceRange(
107             CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
108       continue;
109     }
110     const auto B = getBase(Text);
111     const bool IsBase2 = B == Base::Binary;
112     const bool IsBase10 = B == Base::Decimal;
113     const bool IsBase16 = B == Base::Hex;
114     if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
115         (IsBase16 && SkipHex) || B == Base::Other) {
116       continue;
117     }
118     if (Style.isCpp()) {
119       // Hex alpha digits a-f/A-F must be at the end of the string literal.
120       StringRef Suffixes = "_himnsuyd";
121       if (const auto Pos =
122               Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
123           Pos != StringRef::npos) {
124         Text = Text.substr(0, Pos);
125         Length = Pos;
126       }
127     }
128     if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
129         (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
130       continue;
131     }
132     const auto Start = Text[0] == '0' ? 2 : 0;
133     auto End = Text.find_first_of("uUlLzZn", Start);
134     if (End == StringRef::npos)
135       End = Length;
136     if (Start > 0 || End < Length) {
137       Length = End - Start;
138       Text = Text.substr(Start, Length);
139     }
140     auto DigitsPerGroup = Decimal;
141     auto MinDigits = DecimalMinDigits;
142     if (IsBase2) {
143       DigitsPerGroup = Binary;
144       MinDigits = BinaryMinDigits;
145     } else if (IsBase16) {
146       DigitsPerGroup = Hex;
147       MinDigits = HexMinDigits;
148     }
149     const auto SeparatorCount = Text.count(Separator);
150     const int DigitCount = Length - SeparatorCount;
151     const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
152     if (RemoveSeparator && SeparatorCount == 0)
153       continue;
154     if (!RemoveSeparator && SeparatorCount > 0 &&
155         checkSeparator(Text, DigitsPerGroup)) {
156       continue;
157     }
158     const auto &Formatted =
159         format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
160     assert(Formatted != Text);
161     if (Start > 0)
162       Location = Location.getLocWithOffset(Start);
163     cantFail(Result.add(
164         tooling::Replacement(SourceMgr, Location, Length, Formatted)));
165   }
166 
167   return {Result, 0};
168 }
169 
checkSeparator(const StringRef IntegerLiteral,int DigitsPerGroup) const170 bool IntegerLiteralSeparatorFixer::checkSeparator(
171     const StringRef IntegerLiteral, int DigitsPerGroup) const {
172   assert(DigitsPerGroup > 0);
173 
174   int I = 0;
175   for (auto C : llvm::reverse(IntegerLiteral)) {
176     if (C == Separator) {
177       if (I < DigitsPerGroup)
178         return false;
179       I = 0;
180     } else {
181       if (I == DigitsPerGroup)
182         return false;
183       ++I;
184     }
185   }
186 
187   return true;
188 }
189 
format(const StringRef IntegerLiteral,int DigitsPerGroup,int DigitCount,bool RemoveSeparator) const190 std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
191                                                  int DigitsPerGroup,
192                                                  int DigitCount,
193                                                  bool RemoveSeparator) const {
194   assert(DigitsPerGroup != 0);
195 
196   std::string Formatted;
197 
198   if (RemoveSeparator) {
199     for (auto C : IntegerLiteral)
200       if (C != Separator)
201         Formatted.push_back(C);
202     return Formatted;
203   }
204 
205   int Remainder = DigitCount % DigitsPerGroup;
206 
207   int I = 0;
208   for (auto C : IntegerLiteral) {
209     if (C == Separator)
210       continue;
211     if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
212       Formatted.push_back(Separator);
213       I = 0;
214       Remainder = 0;
215     }
216     Formatted.push_back(C);
217     ++I;
218   }
219 
220   return Formatted;
221 }
222 
223 } // namespace format
224 } // namespace clang
225