xref: /freebsd/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (revision 9e7101a856ad738879b0bde099bfb2ba08b7995c)
1 //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11 /// literal separators.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IntegerLiteralSeparatorFixer.h"
16 
17 namespace clang {
18 namespace format {
19 
20 enum class Base { Binary, Decimal, Hex, Other };
21 
22 static Base getBase(const StringRef IntegerLiteral) {
23   assert(IntegerLiteral.size() > 1);
24 
25   if (IntegerLiteral[0] > '0') {
26     assert(IntegerLiteral[0] <= '9');
27     return Base::Decimal;
28   }
29 
30   assert(IntegerLiteral[0] == '0');
31 
32   switch (IntegerLiteral[1]) {
33   case 'b':
34   case 'B':
35     return Base::Binary;
36   case 'x':
37   case 'X':
38     return Base::Hex;
39   default:
40     return Base::Other;
41   }
42 }
43 
44 std::pair<tooling::Replacements, unsigned>
45 IntegerLiteralSeparatorFixer::process(const Environment &Env,
46                                       const FormatStyle &Style) {
47   switch (Style.Language) {
48   case FormatStyle::LK_Cpp:
49   case FormatStyle::LK_ObjC:
50     Separator = '\'';
51     break;
52   case FormatStyle::LK_CSharp:
53   case FormatStyle::LK_Java:
54   case FormatStyle::LK_JavaScript:
55     Separator = '_';
56     break;
57   default:
58     return {};
59   }
60 
61   const auto &Option = Style.IntegerLiteralSeparator;
62   const auto Binary = Option.Binary;
63   const auto Decimal = Option.Decimal;
64   const auto Hex = Option.Hex;
65   const bool SkipBinary = Binary == 0;
66   const bool SkipDecimal = Decimal == 0;
67   const bool SkipHex = Hex == 0;
68 
69   if (SkipBinary && SkipDecimal && SkipHex)
70     return {};
71 
72   const auto BinaryMinDigits =
73       std::max((int)Option.BinaryMinDigits, Binary + 1);
74   const auto DecimalMinDigits =
75       std::max((int)Option.DecimalMinDigits, Decimal + 1);
76   const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
77 
78   const auto &SourceMgr = Env.getSourceManager();
79   AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80 
81   const auto ID = Env.getFileID();
82   const auto LangOpts = getFormattingLangOpts(Style);
83   Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84   Lex.SetCommentRetentionState(true);
85 
86   Token Tok;
87   tooling::Replacements Result;
88 
89   for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90     auto Length = Tok.getLength();
91     if (Length < 2)
92       continue;
93     auto Location = Tok.getLocation();
94     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95     if (Tok.is(tok::comment)) {
96       if (Text == "// clang-format off" || Text == "/* clang-format off */")
97         Skip = true;
98       else if (Text == "// clang-format on" || Text == "/* clang-format on */")
99         Skip = false;
100       continue;
101     }
102     if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103         !AffectedRangeMgr.affectsCharSourceRange(
104             CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105       continue;
106     }
107     const auto B = getBase(Text);
108     const bool IsBase2 = B == Base::Binary;
109     const bool IsBase10 = B == Base::Decimal;
110     const bool IsBase16 = B == Base::Hex;
111     if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112         (IsBase16 && SkipHex) || B == Base::Other) {
113       continue;
114     }
115     if (Style.isCpp()) {
116       if (const auto Pos = Text.find_first_of("_i"); Pos != StringRef::npos) {
117         Text = Text.substr(0, Pos);
118         Length = Pos;
119       }
120     }
121     if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
122         (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
123       continue;
124     }
125     const auto Start = Text[0] == '0' ? 2 : 0;
126     auto End = Text.find_first_of("uUlLzZn", Start);
127     if (End == StringRef::npos)
128       End = Length;
129     if (Start > 0 || End < Length) {
130       Length = End - Start;
131       Text = Text.substr(Start, Length);
132     }
133     auto DigitsPerGroup = Decimal;
134     auto MinDigits = DecimalMinDigits;
135     if (IsBase2) {
136       DigitsPerGroup = Binary;
137       MinDigits = BinaryMinDigits;
138     } else if (IsBase16) {
139       DigitsPerGroup = Hex;
140       MinDigits = HexMinDigits;
141     }
142     const auto SeparatorCount = Text.count(Separator);
143     const int DigitCount = Length - SeparatorCount;
144     const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
145     if (RemoveSeparator && SeparatorCount == 0)
146       continue;
147     if (!RemoveSeparator && SeparatorCount > 0 &&
148         checkSeparator(Text, DigitsPerGroup)) {
149       continue;
150     }
151     const auto &Formatted =
152         format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
153     assert(Formatted != Text);
154     if (Start > 0)
155       Location = Location.getLocWithOffset(Start);
156     cantFail(Result.add(
157         tooling::Replacement(SourceMgr, Location, Length, Formatted)));
158   }
159 
160   return {Result, 0};
161 }
162 
163 bool IntegerLiteralSeparatorFixer::checkSeparator(
164     const StringRef IntegerLiteral, int DigitsPerGroup) const {
165   assert(DigitsPerGroup > 0);
166 
167   int I = 0;
168   for (auto C : llvm::reverse(IntegerLiteral)) {
169     if (C == Separator) {
170       if (I < DigitsPerGroup)
171         return false;
172       I = 0;
173     } else {
174       if (I == DigitsPerGroup)
175         return false;
176       ++I;
177     }
178   }
179 
180   return true;
181 }
182 
183 std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
184                                                  int DigitsPerGroup,
185                                                  int DigitCount,
186                                                  bool RemoveSeparator) const {
187   assert(DigitsPerGroup != 0);
188 
189   std::string Formatted;
190 
191   if (RemoveSeparator) {
192     for (auto C : IntegerLiteral)
193       if (C != Separator)
194         Formatted.push_back(C);
195     return Formatted;
196   }
197 
198   int Remainder = DigitCount % DigitsPerGroup;
199 
200   int I = 0;
201   for (auto C : IntegerLiteral) {
202     if (C == Separator)
203       continue;
204     if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
205       Formatted.push_back(Separator);
206       I = 0;
207       Remainder = 0;
208     }
209     Formatted.push_back(C);
210     ++I;
211   }
212 
213   return Formatted;
214 }
215 
216 } // namespace format
217 } // namespace clang
218