1 //===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a clang-format tool that automatically formats 11 /// (fragments of) C++ code. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/DiagnosticOptions.h" 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Basic/Version.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Rewrite/Core/Rewriter.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/Process.h" 26 27 using namespace llvm; 28 using clang::tooling::Replacements; 29 30 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden); 31 32 // Mark all our options with this category, everything else (except for -version 33 // and -help) will be hidden. 34 static cl::OptionCategory ClangFormatCategory("Clang-format options"); 35 36 static cl::list<unsigned> 37 Offsets("offset", 38 cl::desc("Format a range starting at this byte offset.\n" 39 "Multiple ranges can be formatted by specifying\n" 40 "several -offset and -length pairs.\n" 41 "Can only be used with one input file."), 42 cl::cat(ClangFormatCategory)); 43 static cl::list<unsigned> 44 Lengths("length", 45 cl::desc("Format a range of this length (in bytes).\n" 46 "Multiple ranges can be formatted by specifying\n" 47 "several -offset and -length pairs.\n" 48 "When only a single -offset is specified without\n" 49 "-length, clang-format will format up to the end\n" 50 "of the file.\n" 51 "Can only be used with one input file."), 52 cl::cat(ClangFormatCategory)); 53 static cl::list<std::string> 54 LineRanges("lines", cl::desc("<start line>:<end line> - format a range of\n" 55 "lines (both 1-based).\n" 56 "Multiple ranges can be formatted by specifying\n" 57 "several -lines arguments.\n" 58 "Can't be used with -offset and -length.\n" 59 "Can only be used with one input file."), 60 cl::cat(ClangFormatCategory)); 61 static cl::opt<std::string> 62 Style("style", cl::desc(clang::format::StyleOptionHelpDescription), 63 cl::init(clang::format::DefaultFormatStyle), 64 cl::cat(ClangFormatCategory)); 65 static cl::opt<std::string> 66 FallbackStyle("fallback-style", 67 cl::desc("The name of the predefined style used as a\n" 68 "fallback in case clang-format is invoked with\n" 69 "-style=file, but can not find the .clang-format\n" 70 "file to use.\n" 71 "Use -fallback-style=none to skip formatting."), 72 cl::init(clang::format::DefaultFallbackStyle), 73 cl::cat(ClangFormatCategory)); 74 75 static cl::opt<std::string> 76 AssumeFileName("assume-filename", 77 cl::desc("When reading from stdin, clang-format assumes this\n" 78 "filename to look for a style config file (with\n" 79 "-style=file) and to determine the language."), 80 cl::init("<stdin>"), cl::cat(ClangFormatCategory)); 81 82 static cl::opt<bool> Inplace("i", 83 cl::desc("Inplace edit <file>s, if specified."), 84 cl::cat(ClangFormatCategory)); 85 86 static cl::opt<bool> OutputXML("output-replacements-xml", 87 cl::desc("Output replacements as XML."), 88 cl::cat(ClangFormatCategory)); 89 static cl::opt<bool> 90 DumpConfig("dump-config", 91 cl::desc("Dump configuration options to stdout and exit.\n" 92 "Can be used with -style option."), 93 cl::cat(ClangFormatCategory)); 94 static cl::opt<unsigned> 95 Cursor("cursor", 96 cl::desc("The position of the cursor when invoking\n" 97 "clang-format from an editor integration"), 98 cl::init(0), cl::cat(ClangFormatCategory)); 99 100 static cl::opt<bool> SortIncludes( 101 "sort-includes", 102 cl::desc("If set, overrides the include sorting behavior determined by the " 103 "SortIncludes style flag"), 104 cl::cat(ClangFormatCategory)); 105 106 static cl::opt<bool> 107 Verbose("verbose", cl::desc("If set, shows the list of processed files"), 108 cl::cat(ClangFormatCategory)); 109 110 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"), 111 cl::cat(ClangFormatCategory)); 112 113 namespace clang { 114 namespace format { 115 116 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source, 117 SourceManager &Sources, FileManager &Files, 118 llvm::vfs::InMemoryFileSystem *MemFS) { 119 MemFS->addFileNoOwn(FileName, 0, Source); 120 return Sources.createFileID(Files.getFile(FileName), SourceLocation(), 121 SrcMgr::C_User); 122 } 123 124 // Parses <start line>:<end line> input to a pair of line numbers. 125 // Returns true on error. 126 static bool parseLineRange(StringRef Input, unsigned &FromLine, 127 unsigned &ToLine) { 128 std::pair<StringRef, StringRef> LineRange = Input.split(':'); 129 return LineRange.first.getAsInteger(0, FromLine) || 130 LineRange.second.getAsInteger(0, ToLine); 131 } 132 133 static bool fillRanges(MemoryBuffer *Code, 134 std::vector<tooling::Range> &Ranges) { 135 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem( 136 new llvm::vfs::InMemoryFileSystem); 137 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 138 DiagnosticsEngine Diagnostics( 139 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 140 new DiagnosticOptions); 141 SourceManager Sources(Diagnostics, Files); 142 FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files, 143 InMemoryFileSystem.get()); 144 if (!LineRanges.empty()) { 145 if (!Offsets.empty() || !Lengths.empty()) { 146 errs() << "error: cannot use -lines with -offset/-length\n"; 147 return true; 148 } 149 150 for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) { 151 unsigned FromLine, ToLine; 152 if (parseLineRange(LineRanges[i], FromLine, ToLine)) { 153 errs() << "error: invalid <start line>:<end line> pair\n"; 154 return true; 155 } 156 if (FromLine > ToLine) { 157 errs() << "error: start line should be less than end line\n"; 158 return true; 159 } 160 SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1); 161 SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX); 162 if (Start.isInvalid() || End.isInvalid()) 163 return true; 164 unsigned Offset = Sources.getFileOffset(Start); 165 unsigned Length = Sources.getFileOffset(End) - Offset; 166 Ranges.push_back(tooling::Range(Offset, Length)); 167 } 168 return false; 169 } 170 171 if (Offsets.empty()) 172 Offsets.push_back(0); 173 if (Offsets.size() != Lengths.size() && 174 !(Offsets.size() == 1 && Lengths.empty())) { 175 errs() << "error: number of -offset and -length arguments must match.\n"; 176 return true; 177 } 178 for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { 179 if (Offsets[i] >= Code->getBufferSize()) { 180 errs() << "error: offset " << Offsets[i] << " is outside the file\n"; 181 return true; 182 } 183 SourceLocation Start = 184 Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]); 185 SourceLocation End; 186 if (i < Lengths.size()) { 187 if (Offsets[i] + Lengths[i] > Code->getBufferSize()) { 188 errs() << "error: invalid length " << Lengths[i] 189 << ", offset + length (" << Offsets[i] + Lengths[i] 190 << ") is outside the file.\n"; 191 return true; 192 } 193 End = Start.getLocWithOffset(Lengths[i]); 194 } else { 195 End = Sources.getLocForEndOfFile(ID); 196 } 197 unsigned Offset = Sources.getFileOffset(Start); 198 unsigned Length = Sources.getFileOffset(End) - Offset; 199 Ranges.push_back(tooling::Range(Offset, Length)); 200 } 201 return false; 202 } 203 204 static void outputReplacementXML(StringRef Text) { 205 // FIXME: When we sort includes, we need to make sure the stream is correct 206 // utf-8. 207 size_t From = 0; 208 size_t Index; 209 while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) { 210 outs() << Text.substr(From, Index - From); 211 switch (Text[Index]) { 212 case '\n': 213 outs() << " "; 214 break; 215 case '\r': 216 outs() << " "; 217 break; 218 case '<': 219 outs() << "<"; 220 break; 221 case '&': 222 outs() << "&"; 223 break; 224 default: 225 llvm_unreachable("Unexpected character encountered!"); 226 } 227 From = Index + 1; 228 } 229 outs() << Text.substr(From); 230 } 231 232 static void outputReplacementsXML(const Replacements &Replaces) { 233 for (const auto &R : Replaces) { 234 outs() << "<replacement " 235 << "offset='" << R.getOffset() << "' " 236 << "length='" << R.getLength() << "'>"; 237 outputReplacementXML(R.getReplacementText()); 238 outs() << "</replacement>\n"; 239 } 240 } 241 242 // Returns true on error. 243 static bool format(StringRef FileName) { 244 if (!OutputXML && Inplace && FileName == "-") { 245 errs() << "error: cannot use -i when reading from stdin.\n"; 246 return false; 247 } 248 // On Windows, overwriting a file with an open file mapping doesn't work, 249 // so read the whole file into memory when formatting in-place. 250 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = 251 !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) : 252 MemoryBuffer::getFileOrSTDIN(FileName); 253 if (std::error_code EC = CodeOrErr.getError()) { 254 errs() << EC.message() << "\n"; 255 return true; 256 } 257 std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get()); 258 if (Code->getBufferSize() == 0) 259 return false; // Empty files are formatted correctly. 260 261 // Check to see if the buffer has a UTF Byte Order Mark (BOM). 262 // We only support UTF-8 with and without a BOM right now. See 263 // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding 264 // for more information. 265 StringRef BufStr = Code->getBuffer(); 266 const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr) 267 .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), 268 "UTF-32 (BE)") 269 .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), 270 "UTF-32 (LE)") 271 .StartsWith("\xFE\xFF", "UTF-16 (BE)") 272 .StartsWith("\xFF\xFE", "UTF-16 (LE)") 273 .StartsWith("\x2B\x2F\x76", "UTF-7") 274 .StartsWith("\xF7\x64\x4C", "UTF-1") 275 .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") 276 .StartsWith("\x0E\xFE\xFF", "SCSU") 277 .StartsWith("\xFB\xEE\x28", "BOCU-1") 278 .StartsWith("\x84\x31\x95\x33", "GB-18030") 279 .Default(nullptr); 280 281 if (InvalidBOM) { 282 errs() << "error: encoding with unsupported byte order mark \"" 283 << InvalidBOM << "\" detected"; 284 if (FileName != "-") 285 errs() << " in file '" << FileName << "'"; 286 errs() << ".\n"; 287 return true; 288 } 289 290 std::vector<tooling::Range> Ranges; 291 if (fillRanges(Code.get(), Ranges)) 292 return true; 293 StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName; 294 295 llvm::Expected<FormatStyle> FormatStyle = 296 getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer()); 297 if (!FormatStyle) { 298 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; 299 return true; 300 } 301 302 if (SortIncludes.getNumOccurrences() != 0) 303 FormatStyle->SortIncludes = SortIncludes; 304 unsigned CursorPosition = Cursor; 305 Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges, 306 AssumedFileName, &CursorPosition); 307 auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces); 308 if (!ChangedCode) { 309 llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n"; 310 return true; 311 } 312 // Get new affected ranges after sorting `#includes`. 313 Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges); 314 FormattingAttemptStatus Status; 315 Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges, 316 AssumedFileName, &Status); 317 Replaces = Replaces.merge(FormatChanges); 318 if (OutputXML) { 319 outs() << "<?xml version='1.0'?>\n<replacements " 320 "xml:space='preserve' incomplete_format='" 321 << (Status.FormatComplete ? "false" : "true") << "'"; 322 if (!Status.FormatComplete) 323 outs() << " line='" << Status.Line << "'"; 324 outs() << ">\n"; 325 if (Cursor.getNumOccurrences() != 0) 326 outs() << "<cursor>" 327 << FormatChanges.getShiftedCodePosition(CursorPosition) 328 << "</cursor>\n"; 329 330 outputReplacementsXML(Replaces); 331 outs() << "</replacements>\n"; 332 } else { 333 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem( 334 new llvm::vfs::InMemoryFileSystem); 335 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 336 DiagnosticsEngine Diagnostics( 337 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 338 new DiagnosticOptions); 339 SourceManager Sources(Diagnostics, Files); 340 FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, 341 InMemoryFileSystem.get()); 342 Rewriter Rewrite(Sources, LangOptions()); 343 tooling::applyAllReplacements(Replaces, Rewrite); 344 if (Inplace) { 345 if (Rewrite.overwriteChangedFiles()) 346 return true; 347 } else { 348 if (Cursor.getNumOccurrences() != 0) { 349 outs() << "{ \"Cursor\": " 350 << FormatChanges.getShiftedCodePosition(CursorPosition) 351 << ", \"IncompleteFormat\": " 352 << (Status.FormatComplete ? "false" : "true"); 353 if (!Status.FormatComplete) 354 outs() << ", \"Line\": " << Status.Line; 355 outs() << " }\n"; 356 } 357 Rewrite.getEditBuffer(ID).write(outs()); 358 } 359 } 360 return false; 361 } 362 363 } // namespace format 364 } // namespace clang 365 366 static void PrintVersion(raw_ostream &OS) { 367 OS << clang::getClangToolFullVersion("clang-format") << '\n'; 368 } 369 370 int main(int argc, const char **argv) { 371 llvm::InitLLVM X(argc, argv); 372 373 cl::HideUnrelatedOptions(ClangFormatCategory); 374 375 cl::SetVersionPrinter(PrintVersion); 376 cl::ParseCommandLineOptions( 377 argc, argv, 378 "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" 379 "If no arguments are specified, it formats the code from standard input\n" 380 "and writes the result to the standard output.\n" 381 "If <file>s are given, it reformats the files. If -i is specified\n" 382 "together with <file>s, the files are edited in-place. Otherwise, the\n" 383 "result is written to the standard output.\n"); 384 385 if (Help) { 386 cl::PrintHelpMessage(); 387 return 0; 388 } 389 390 if (DumpConfig) { 391 StringRef FileName; 392 std::unique_ptr<llvm::MemoryBuffer> Code; 393 if (FileNames.empty()) { 394 // We can't read the code to detect the language if there's no 395 // file name, so leave Code empty here. 396 FileName = AssumeFileName; 397 } else { 398 // Read in the code in case the filename alone isn't enough to 399 // detect the language. 400 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = 401 MemoryBuffer::getFileOrSTDIN(FileNames[0]); 402 if (std::error_code EC = CodeOrErr.getError()) { 403 llvm::errs() << EC.message() << "\n"; 404 return 1; 405 } 406 FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0]; 407 Code = std::move(CodeOrErr.get()); 408 } 409 llvm::Expected<clang::format::FormatStyle> FormatStyle = 410 clang::format::getStyle(Style, FileName, FallbackStyle, 411 Code ? Code->getBuffer() : ""); 412 if (!FormatStyle) { 413 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; 414 return 1; 415 } 416 std::string Config = clang::format::configurationAsText(*FormatStyle); 417 outs() << Config << "\n"; 418 return 0; 419 } 420 421 bool Error = false; 422 if (FileNames.empty()) { 423 Error = clang::format::format("-"); 424 return Error ? 1 : 0; 425 } 426 if (FileNames.size() != 1 && (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) { 427 errs() << "error: -offset, -length and -lines can only be used for " 428 "single file.\n"; 429 return 1; 430 } 431 for (const auto &FileName : FileNames) { 432 if (Verbose) 433 errs() << "Formatting " << FileName << "\n"; 434 Error |= clang::format::format(FileName); 435 } 436 return Error ? 1 : 0; 437 } 438