1 //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer 11 /// literal separators. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "IntegerLiteralSeparatorFixer.h" 16 17 namespace clang { 18 namespace format { 19 20 enum class Base { Binary, Decimal, Hex, Other }; 21 22 static Base getBase(const StringRef IntegerLiteral) { 23 assert(IntegerLiteral.size() > 1); 24 25 if (IntegerLiteral[0] > '0') { 26 assert(IntegerLiteral[0] <= '9'); 27 return Base::Decimal; 28 } 29 30 assert(IntegerLiteral[0] == '0'); 31 32 switch (IntegerLiteral[1]) { 33 case 'b': 34 case 'B': 35 return Base::Binary; 36 case 'x': 37 case 'X': 38 return Base::Hex; 39 default: 40 return Base::Other; 41 } 42 } 43 44 std::pair<tooling::Replacements, unsigned> 45 IntegerLiteralSeparatorFixer::process(const Environment &Env, 46 const FormatStyle &Style) { 47 switch (Style.Language) { 48 case FormatStyle::LK_Cpp: 49 case FormatStyle::LK_ObjC: 50 Separator = '\''; 51 break; 52 case FormatStyle::LK_CSharp: 53 case FormatStyle::LK_Java: 54 case FormatStyle::LK_JavaScript: 55 Separator = '_'; 56 break; 57 default: 58 return {}; 59 } 60 61 const auto &Option = Style.IntegerLiteralSeparator; 62 const auto Binary = Option.Binary; 63 const auto Decimal = Option.Decimal; 64 const auto Hex = Option.Hex; 65 const bool SkipBinary = Binary == 0; 66 const bool SkipDecimal = Decimal == 0; 67 const bool SkipHex = Hex == 0; 68 69 if (SkipBinary && SkipDecimal && SkipHex) 70 return {}; 71 72 const auto BinaryMinDigits = 73 std::max((int)Option.BinaryMinDigits, Binary + 1); 74 const auto DecimalMinDigits = 75 std::max((int)Option.DecimalMinDigits, Decimal + 1); 76 const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1); 77 78 const auto &SourceMgr = Env.getSourceManager(); 79 AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); 80 81 const auto ID = Env.getFileID(); 82 const auto LangOpts = getFormattingLangOpts(Style); 83 Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); 84 Lex.SetCommentRetentionState(true); 85 86 Token Tok; 87 tooling::Replacements Result; 88 89 for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { 90 auto Length = Tok.getLength(); 91 if (Length < 2) 92 continue; 93 auto Location = Tok.getLocation(); 94 auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); 95 if (Tok.is(tok::comment)) { 96 if (Text == "// clang-format off" || Text == "/* clang-format off */") 97 Skip = true; 98 else if (Text == "// clang-format on" || Text == "/* clang-format on */") 99 Skip = false; 100 continue; 101 } 102 if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' || 103 !AffectedRangeMgr.affectsCharSourceRange( 104 CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { 105 continue; 106 } 107 const auto B = getBase(Text); 108 const bool IsBase2 = B == Base::Binary; 109 const bool IsBase10 = B == Base::Decimal; 110 const bool IsBase16 = B == Base::Hex; 111 if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || 112 (IsBase16 && SkipHex) || B == Base::Other) { 113 continue; 114 } 115 if (Style.isCpp()) { 116 if (const auto Pos = Text.find_first_of("_i"); Pos != StringRef::npos) { 117 Text = Text.substr(0, Pos); 118 Length = Pos; 119 } 120 } 121 if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) || 122 (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) { 123 continue; 124 } 125 const auto Start = Text[0] == '0' ? 2 : 0; 126 auto End = Text.find_first_of("uUlLzZn", Start); 127 if (End == StringRef::npos) 128 End = Length; 129 if (Start > 0 || End < Length) { 130 Length = End - Start; 131 Text = Text.substr(Start, Length); 132 } 133 auto DigitsPerGroup = Decimal; 134 auto MinDigits = DecimalMinDigits; 135 if (IsBase2) { 136 DigitsPerGroup = Binary; 137 MinDigits = BinaryMinDigits; 138 } else if (IsBase16) { 139 DigitsPerGroup = Hex; 140 MinDigits = HexMinDigits; 141 } 142 const auto SeparatorCount = Text.count(Separator); 143 const int DigitCount = Length - SeparatorCount; 144 const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; 145 if (RemoveSeparator && SeparatorCount == 0) 146 continue; 147 if (!RemoveSeparator && SeparatorCount > 0 && 148 checkSeparator(Text, DigitsPerGroup)) { 149 continue; 150 } 151 const auto &Formatted = 152 format(Text, DigitsPerGroup, DigitCount, RemoveSeparator); 153 assert(Formatted != Text); 154 if (Start > 0) 155 Location = Location.getLocWithOffset(Start); 156 cantFail(Result.add( 157 tooling::Replacement(SourceMgr, Location, Length, Formatted))); 158 } 159 160 return {Result, 0}; 161 } 162 163 bool IntegerLiteralSeparatorFixer::checkSeparator( 164 const StringRef IntegerLiteral, int DigitsPerGroup) const { 165 assert(DigitsPerGroup > 0); 166 167 int I = 0; 168 for (auto C : llvm::reverse(IntegerLiteral)) { 169 if (C == Separator) { 170 if (I < DigitsPerGroup) 171 return false; 172 I = 0; 173 } else { 174 if (I == DigitsPerGroup) 175 return false; 176 ++I; 177 } 178 } 179 180 return true; 181 } 182 183 std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, 184 int DigitsPerGroup, 185 int DigitCount, 186 bool RemoveSeparator) const { 187 assert(DigitsPerGroup != 0); 188 189 std::string Formatted; 190 191 if (RemoveSeparator) { 192 for (auto C : IntegerLiteral) 193 if (C != Separator) 194 Formatted.push_back(C); 195 return Formatted; 196 } 197 198 int Remainder = DigitCount % DigitsPerGroup; 199 200 int I = 0; 201 for (auto C : IntegerLiteral) { 202 if (C == Separator) 203 continue; 204 if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { 205 Formatted.push_back(Separator); 206 I = 0; 207 Remainder = 0; 208 } 209 Formatted.push_back(C); 210 ++I; 211 } 212 213 return Formatted; 214 } 215 216 } // namespace format 217 } // namespace clang 218