1 //===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a clang-format tool that automatically formats 11 /// (fragments of) C++ code. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/DiagnosticOptions.h" 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Basic/Version.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Frontend/TextDiagnosticPrinter.h" 22 #include "clang/Rewrite/Core/Rewriter.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/InitLLVM.h" 26 #include "llvm/Support/Process.h" 27 28 using namespace llvm; 29 using clang::tooling::Replacements; 30 31 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden); 32 33 // Mark all our options with this category, everything else (except for -version 34 // and -help) will be hidden. 35 static cl::OptionCategory ClangFormatCategory("Clang-format options"); 36 37 static cl::list<unsigned> 38 Offsets("offset", 39 cl::desc("Format a range starting at this byte offset.\n" 40 "Multiple ranges can be formatted by specifying\n" 41 "several -offset and -length pairs.\n" 42 "Can only be used with one input file."), 43 cl::cat(ClangFormatCategory)); 44 static cl::list<unsigned> 45 Lengths("length", 46 cl::desc("Format a range of this length (in bytes).\n" 47 "Multiple ranges can be formatted by specifying\n" 48 "several -offset and -length pairs.\n" 49 "When only a single -offset is specified without\n" 50 "-length, clang-format will format up to the end\n" 51 "of the file.\n" 52 "Can only be used with one input file."), 53 cl::cat(ClangFormatCategory)); 54 static cl::list<std::string> 55 LineRanges("lines", 56 cl::desc("<start line>:<end line> - format a range of\n" 57 "lines (both 1-based).\n" 58 "Multiple ranges can be formatted by specifying\n" 59 "several -lines arguments.\n" 60 "Can't be used with -offset and -length.\n" 61 "Can only be used with one input file."), 62 cl::cat(ClangFormatCategory)); 63 static cl::opt<std::string> 64 Style("style", cl::desc(clang::format::StyleOptionHelpDescription), 65 cl::init(clang::format::DefaultFormatStyle), 66 cl::cat(ClangFormatCategory)); 67 static cl::opt<std::string> 68 FallbackStyle("fallback-style", 69 cl::desc("The name of the predefined style used as a\n" 70 "fallback in case clang-format is invoked with\n" 71 "-style=file, but can not find the .clang-format\n" 72 "file to use.\n" 73 "Use -fallback-style=none to skip formatting."), 74 cl::init(clang::format::DefaultFallbackStyle), 75 cl::cat(ClangFormatCategory)); 76 77 static cl::opt<std::string> AssumeFileName( 78 "assume-filename", 79 cl::desc("When reading from stdin, clang-format assumes this\n" 80 "filename to look for a style config file (with\n" 81 "-style=file) and to determine the language."), 82 cl::init("<stdin>"), cl::cat(ClangFormatCategory)); 83 84 static cl::opt<bool> Inplace("i", 85 cl::desc("Inplace edit <file>s, if specified."), 86 cl::cat(ClangFormatCategory)); 87 88 static cl::opt<bool> OutputXML("output-replacements-xml", 89 cl::desc("Output replacements as XML."), 90 cl::cat(ClangFormatCategory)); 91 static cl::opt<bool> 92 DumpConfig("dump-config", 93 cl::desc("Dump configuration options to stdout and exit.\n" 94 "Can be used with -style option."), 95 cl::cat(ClangFormatCategory)); 96 static cl::opt<unsigned> 97 Cursor("cursor", 98 cl::desc("The position of the cursor when invoking\n" 99 "clang-format from an editor integration"), 100 cl::init(0), cl::cat(ClangFormatCategory)); 101 102 static cl::opt<bool> SortIncludes( 103 "sort-includes", 104 cl::desc("If set, overrides the include sorting behavior determined by the " 105 "SortIncludes style flag"), 106 cl::cat(ClangFormatCategory)); 107 108 static cl::opt<bool> 109 Verbose("verbose", cl::desc("If set, shows the list of processed files"), 110 cl::cat(ClangFormatCategory)); 111 112 // Use --dry-run to match other LLVM tools when you mean do it but don't 113 // actually do it 114 static cl::opt<bool> 115 DryRun("dry-run", 116 cl::desc("If set, do not actually make the formatting changes"), 117 cl::cat(ClangFormatCategory)); 118 119 // Use -n as a common command as an alias for --dry-run. (git and make use -n) 120 static cl::alias DryRunShort("n", cl::desc("Alias for --dry-run"), 121 cl::cat(ClangFormatCategory), cl::aliasopt(DryRun), 122 cl::NotHidden); 123 124 // Emulate being able to turn on/off the warning. 125 static cl::opt<bool> 126 WarnFormat("Wclang-format-violations", 127 cl::desc("Warnings about individual formatting changes needed. " 128 "Used only with --dry-run or -n"), 129 cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden); 130 131 static cl::opt<bool> 132 NoWarnFormat("Wno-clang-format-violations", 133 cl::desc("Do not warn about individual formatting changes " 134 "needed. Used only with --dry-run or -n"), 135 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden); 136 137 static cl::opt<unsigned> ErrorLimit( 138 "ferror-limit", 139 cl::desc("Set the maximum number of clang-format errors to emit before " 140 "stopping (0 = no limit). Used only with --dry-run or -n"), 141 cl::init(0), cl::cat(ClangFormatCategory)); 142 143 static cl::opt<bool> 144 WarningsAsErrors("Werror", 145 cl::desc("If set, changes formatting warnings to errors"), 146 cl::cat(ClangFormatCategory)); 147 148 static cl::opt<bool> 149 ShowColors("fcolor-diagnostics", 150 cl::desc("If set, and on a color-capable terminal controls " 151 "whether or not to print diagnostics in color"), 152 cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden); 153 154 static cl::opt<bool> 155 NoShowColors("fno-color-diagnostics", 156 cl::desc("If set, and on a color-capable terminal controls " 157 "whether or not to print diagnostics in color"), 158 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden); 159 160 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"), 161 cl::cat(ClangFormatCategory)); 162 163 namespace clang { 164 namespace format { 165 166 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source, 167 SourceManager &Sources, FileManager &Files, 168 llvm::vfs::InMemoryFileSystem *MemFS) { 169 MemFS->addFileNoOwn(FileName, 0, Source); 170 auto File = Files.getFile(FileName); 171 return Sources.createFileID(File ? *File : nullptr, SourceLocation(), 172 SrcMgr::C_User); 173 } 174 175 // Parses <start line>:<end line> input to a pair of line numbers. 176 // Returns true on error. 177 static bool parseLineRange(StringRef Input, unsigned &FromLine, 178 unsigned &ToLine) { 179 std::pair<StringRef, StringRef> LineRange = Input.split(':'); 180 return LineRange.first.getAsInteger(0, FromLine) || 181 LineRange.second.getAsInteger(0, ToLine); 182 } 183 184 static bool fillRanges(MemoryBuffer *Code, 185 std::vector<tooling::Range> &Ranges) { 186 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem( 187 new llvm::vfs::InMemoryFileSystem); 188 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 189 DiagnosticsEngine Diagnostics( 190 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 191 new DiagnosticOptions); 192 SourceManager Sources(Diagnostics, Files); 193 FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files, 194 InMemoryFileSystem.get()); 195 if (!LineRanges.empty()) { 196 if (!Offsets.empty() || !Lengths.empty()) { 197 errs() << "error: cannot use -lines with -offset/-length\n"; 198 return true; 199 } 200 201 for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) { 202 unsigned FromLine, ToLine; 203 if (parseLineRange(LineRanges[i], FromLine, ToLine)) { 204 errs() << "error: invalid <start line>:<end line> pair\n"; 205 return true; 206 } 207 if (FromLine > ToLine) { 208 errs() << "error: start line should be less than end line\n"; 209 return true; 210 } 211 SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1); 212 SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX); 213 if (Start.isInvalid() || End.isInvalid()) 214 return true; 215 unsigned Offset = Sources.getFileOffset(Start); 216 unsigned Length = Sources.getFileOffset(End) - Offset; 217 Ranges.push_back(tooling::Range(Offset, Length)); 218 } 219 return false; 220 } 221 222 if (Offsets.empty()) 223 Offsets.push_back(0); 224 if (Offsets.size() != Lengths.size() && 225 !(Offsets.size() == 1 && Lengths.empty())) { 226 errs() << "error: number of -offset and -length arguments must match.\n"; 227 return true; 228 } 229 for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { 230 if (Offsets[i] >= Code->getBufferSize()) { 231 errs() << "error: offset " << Offsets[i] << " is outside the file\n"; 232 return true; 233 } 234 SourceLocation Start = 235 Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]); 236 SourceLocation End; 237 if (i < Lengths.size()) { 238 if (Offsets[i] + Lengths[i] > Code->getBufferSize()) { 239 errs() << "error: invalid length " << Lengths[i] 240 << ", offset + length (" << Offsets[i] + Lengths[i] 241 << ") is outside the file.\n"; 242 return true; 243 } 244 End = Start.getLocWithOffset(Lengths[i]); 245 } else { 246 End = Sources.getLocForEndOfFile(ID); 247 } 248 unsigned Offset = Sources.getFileOffset(Start); 249 unsigned Length = Sources.getFileOffset(End) - Offset; 250 Ranges.push_back(tooling::Range(Offset, Length)); 251 } 252 return false; 253 } 254 255 static void outputReplacementXML(StringRef Text) { 256 // FIXME: When we sort includes, we need to make sure the stream is correct 257 // utf-8. 258 size_t From = 0; 259 size_t Index; 260 while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) { 261 outs() << Text.substr(From, Index - From); 262 switch (Text[Index]) { 263 case '\n': 264 outs() << " "; 265 break; 266 case '\r': 267 outs() << " "; 268 break; 269 case '<': 270 outs() << "<"; 271 break; 272 case '&': 273 outs() << "&"; 274 break; 275 default: 276 llvm_unreachable("Unexpected character encountered!"); 277 } 278 From = Index + 1; 279 } 280 outs() << Text.substr(From); 281 } 282 283 static void outputReplacementsXML(const Replacements &Replaces) { 284 for (const auto &R : Replaces) { 285 outs() << "<replacement " 286 << "offset='" << R.getOffset() << "' " 287 << "length='" << R.getLength() << "'>"; 288 outputReplacementXML(R.getReplacementText()); 289 outs() << "</replacement>\n"; 290 } 291 } 292 293 // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns 294 // nullptr. 295 static const char *getInValidBOM(StringRef BufStr) { 296 // Check to see if the buffer has a UTF Byte Order Mark (BOM). 297 // We only support UTF-8 with and without a BOM right now. See 298 // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding 299 // for more information. 300 const char *InvalidBOM = 301 llvm::StringSwitch<const char *>(BufStr) 302 .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), 303 "UTF-32 (BE)") 304 .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), 305 "UTF-32 (LE)") 306 .StartsWith("\xFE\xFF", "UTF-16 (BE)") 307 .StartsWith("\xFF\xFE", "UTF-16 (LE)") 308 .StartsWith("\x2B\x2F\x76", "UTF-7") 309 .StartsWith("\xF7\x64\x4C", "UTF-1") 310 .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") 311 .StartsWith("\x0E\xFE\xFF", "SCSU") 312 .StartsWith("\xFB\xEE\x28", "BOCU-1") 313 .StartsWith("\x84\x31\x95\x33", "GB-18030") 314 .Default(nullptr); 315 return InvalidBOM; 316 } 317 318 static bool 319 emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName, 320 const std::unique_ptr<llvm::MemoryBuffer> &Code) { 321 if (Replaces.empty()) { 322 return false; 323 } 324 325 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 326 DiagOpts->ShowColors = (ShowColors && !NoShowColors); 327 328 TextDiagnosticPrinter *DiagsBuffer = 329 new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts, false); 330 331 IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs()); 332 IntrusiveRefCntPtr<DiagnosticsEngine> Diags( 333 new DiagnosticsEngine(DiagID, &*DiagOpts, DiagsBuffer)); 334 335 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem( 336 new llvm::vfs::InMemoryFileSystem); 337 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 338 SourceManager Sources(*Diags, Files); 339 FileID FileID = createInMemoryFile(AssumedFileName, Code.get(), Sources, 340 Files, InMemoryFileSystem.get()); 341 342 const unsigned ID = Diags->getCustomDiagID( 343 WarningsAsErrors ? clang::DiagnosticsEngine::Error 344 : clang::DiagnosticsEngine::Warning, 345 "code should be clang-formatted [-Wclang-format-violations]"); 346 347 unsigned Errors = 0; 348 DiagsBuffer->BeginSourceFile(LangOptions(), nullptr); 349 if (WarnFormat && !NoWarnFormat) { 350 for (const auto &R : Replaces) { 351 Diags->Report( 352 Sources.getLocForStartOfFile(FileID).getLocWithOffset(R.getOffset()), 353 ID); 354 Errors++; 355 if (ErrorLimit && Errors >= ErrorLimit) 356 break; 357 } 358 } 359 DiagsBuffer->EndSourceFile(); 360 return WarningsAsErrors; 361 } 362 363 static void outputXML(const Replacements &Replaces, 364 const Replacements &FormatChanges, 365 const FormattingAttemptStatus &Status, 366 const cl::opt<unsigned> &Cursor, 367 unsigned CursorPosition) { 368 outs() << "<?xml version='1.0'?>\n<replacements " 369 "xml:space='preserve' incomplete_format='" 370 << (Status.FormatComplete ? "false" : "true") << "'"; 371 if (!Status.FormatComplete) 372 outs() << " line='" << Status.Line << "'"; 373 outs() << ">\n"; 374 if (Cursor.getNumOccurrences() != 0) 375 outs() << "<cursor>" << FormatChanges.getShiftedCodePosition(CursorPosition) 376 << "</cursor>\n"; 377 378 outputReplacementsXML(Replaces); 379 outs() << "</replacements>\n"; 380 } 381 382 // Returns true on error. 383 static bool format(StringRef FileName) { 384 if (!OutputXML && Inplace && FileName == "-") { 385 errs() << "error: cannot use -i when reading from stdin.\n"; 386 return false; 387 } 388 // On Windows, overwriting a file with an open file mapping doesn't work, 389 // so read the whole file into memory when formatting in-place. 390 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = 391 !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) 392 : MemoryBuffer::getFileOrSTDIN(FileName); 393 if (std::error_code EC = CodeOrErr.getError()) { 394 errs() << EC.message() << "\n"; 395 return true; 396 } 397 std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get()); 398 if (Code->getBufferSize() == 0) 399 return false; // Empty files are formatted correctly. 400 401 StringRef BufStr = Code->getBuffer(); 402 403 const char *InvalidBOM = getInValidBOM(BufStr); 404 405 if (InvalidBOM) { 406 errs() << "error: encoding with unsupported byte order mark \"" 407 << InvalidBOM << "\" detected"; 408 if (FileName != "-") 409 errs() << " in file '" << FileName << "'"; 410 errs() << ".\n"; 411 return true; 412 } 413 414 std::vector<tooling::Range> Ranges; 415 if (fillRanges(Code.get(), Ranges)) 416 return true; 417 StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName; 418 419 llvm::Expected<FormatStyle> FormatStyle = 420 getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer()); 421 if (!FormatStyle) { 422 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; 423 return true; 424 } 425 426 if (SortIncludes.getNumOccurrences() != 0) 427 FormatStyle->SortIncludes = SortIncludes; 428 unsigned CursorPosition = Cursor; 429 Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges, 430 AssumedFileName, &CursorPosition); 431 auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces); 432 if (!ChangedCode) { 433 llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n"; 434 return true; 435 } 436 // Get new affected ranges after sorting `#includes`. 437 Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges); 438 FormattingAttemptStatus Status; 439 Replacements FormatChanges = 440 reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status); 441 Replaces = Replaces.merge(FormatChanges); 442 if (OutputXML || DryRun) { 443 if (DryRun) { 444 return emitReplacementWarnings(Replaces, AssumedFileName, Code); 445 } else { 446 outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition); 447 } 448 } else { 449 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem( 450 new llvm::vfs::InMemoryFileSystem); 451 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 452 DiagnosticsEngine Diagnostics( 453 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 454 new DiagnosticOptions); 455 SourceManager Sources(Diagnostics, Files); 456 FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, 457 InMemoryFileSystem.get()); 458 Rewriter Rewrite(Sources, LangOptions()); 459 tooling::applyAllReplacements(Replaces, Rewrite); 460 if (Inplace) { 461 if (Rewrite.overwriteChangedFiles()) 462 return true; 463 } else { 464 if (Cursor.getNumOccurrences() != 0) { 465 outs() << "{ \"Cursor\": " 466 << FormatChanges.getShiftedCodePosition(CursorPosition) 467 << ", \"IncompleteFormat\": " 468 << (Status.FormatComplete ? "false" : "true"); 469 if (!Status.FormatComplete) 470 outs() << ", \"Line\": " << Status.Line; 471 outs() << " }\n"; 472 } 473 Rewrite.getEditBuffer(ID).write(outs()); 474 } 475 } 476 return false; 477 } 478 479 } // namespace format 480 } // namespace clang 481 482 static void PrintVersion(raw_ostream &OS) { 483 OS << clang::getClangToolFullVersion("clang-format") << '\n'; 484 } 485 486 // Dump the configuration. 487 static int dumpConfig() { 488 StringRef FileName; 489 std::unique_ptr<llvm::MemoryBuffer> Code; 490 if (FileNames.empty()) { 491 // We can't read the code to detect the language if there's no 492 // file name, so leave Code empty here. 493 FileName = AssumeFileName; 494 } else { 495 // Read in the code in case the filename alone isn't enough to 496 // detect the language. 497 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = 498 MemoryBuffer::getFileOrSTDIN(FileNames[0]); 499 if (std::error_code EC = CodeOrErr.getError()) { 500 llvm::errs() << EC.message() << "\n"; 501 return 1; 502 } 503 FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0]; 504 Code = std::move(CodeOrErr.get()); 505 } 506 llvm::Expected<clang::format::FormatStyle> FormatStyle = 507 clang::format::getStyle(Style, FileName, FallbackStyle, 508 Code ? Code->getBuffer() : ""); 509 if (!FormatStyle) { 510 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; 511 return 1; 512 } 513 std::string Config = clang::format::configurationAsText(*FormatStyle); 514 outs() << Config << "\n"; 515 return 0; 516 } 517 518 int main(int argc, const char **argv) { 519 llvm::InitLLVM X(argc, argv); 520 521 cl::HideUnrelatedOptions(ClangFormatCategory); 522 523 cl::SetVersionPrinter(PrintVersion); 524 cl::ParseCommandLineOptions( 525 argc, argv, 526 "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" 527 "If no arguments are specified, it formats the code from standard input\n" 528 "and writes the result to the standard output.\n" 529 "If <file>s are given, it reformats the files. If -i is specified\n" 530 "together with <file>s, the files are edited in-place. Otherwise, the\n" 531 "result is written to the standard output.\n"); 532 533 if (Help) { 534 cl::PrintHelpMessage(); 535 return 0; 536 } 537 538 if (DumpConfig) { 539 return dumpConfig(); 540 } 541 542 bool Error = false; 543 if (FileNames.empty()) { 544 Error = clang::format::format("-"); 545 return Error ? 1 : 0; 546 } 547 if (FileNames.size() != 1 && 548 (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) { 549 errs() << "error: -offset, -length and -lines can only be used for " 550 "single file.\n"; 551 return 1; 552 } 553 for (const auto &FileName : FileNames) { 554 if (Verbose) 555 errs() << "Formatting " << FileName << "\n"; 556 Error |= clang::format::format(FileName); 557 } 558 return Error ? 1 : 0; 559 } 560