1 //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This program finds similar sections of a Module, and exports them as a JSON 10 // file. 11 // 12 // To find similarities contained across multiple modules, please use llvm-link 13 // first to merge the modules. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Analysis/IRSimilarityIdentifier.h" 18 #include "llvm/IRReader/IRReader.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/InitLLVM.h" 22 #include "llvm/Support/JSON.h" 23 #include "llvm/Support/SourceMgr.h" 24 #include "llvm/Support/ToolOutputFile.h" 25 26 using namespace llvm; 27 using namespace IRSimilarity; 28 29 static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"), 30 cl::init("-"), 31 cl::value_desc("filename")); 32 33 static cl::opt<std::string> InputSourceFile(cl::Positional, 34 cl::desc("<Source file>"), 35 cl::init("-"), 36 cl::value_desc("filename")); 37 38 /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. 39 /// 40 /// \param I - The Instruction to find the instruction number for. 41 /// \param LLVMInstNum - The mapping of Instructions to their location in the 42 /// module represented by an unsigned integer. 43 /// \returns The instruction number for \p I if it exists. 44 Optional<unsigned> 45 getPositionInModule(const Instruction *I, 46 const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 47 assert(I && "Instruction is nullptr!"); 48 DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I); 49 if (It == LLVMInstNum.end()) 50 return None; 51 return It->second; 52 } 53 54 /// Exports the given SimilarityGroups to a JSON file at \p FilePath. 55 /// 56 /// \param FilePath - The path to the output location. 57 /// \param SimSections - The similarity groups to process. 58 /// \param LLVMInstNum - The mapping of Instructions to their location in the 59 /// module represented by an unsigned integer. 60 /// \returns A nonzero error code if there was a failure creating the file. 61 std::error_code 62 exportToFile(const StringRef FilePath, 63 const SimilarityGroupList &SimSections, 64 const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 65 std::error_code EC; 66 std::unique_ptr<ToolOutputFile> Out( 67 new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); 68 if (EC) 69 return EC; 70 71 json::OStream J(Out->os(), 1); 72 J.objectBegin(); 73 74 unsigned SimOption = 1; 75 // Process each list of SimilarityGroups organized by the Module. 76 for (const SimilarityGroup &G : SimSections) { 77 std::string SimOptionStr = std::to_string(SimOption); 78 J.attributeBegin(SimOptionStr); 79 J.arrayBegin(); 80 // For each file there is a list of the range where the similarity 81 // exists. 82 for (const IRSimilarityCandidate &C : G) { 83 Optional<unsigned> Start = 84 getPositionInModule((*C.front()).Inst, LLVMInstNum); 85 Optional<unsigned> End = 86 getPositionInModule((*C.back()).Inst, LLVMInstNum); 87 88 assert(Start.hasValue() && 89 "Could not find instruction number for first instruction"); 90 assert(End.hasValue() && 91 "Could not find instruction number for last instruction"); 92 93 J.object([&] { 94 J.attribute("start", Start.getValue()); 95 J.attribute("end", End.getValue()); 96 }); 97 } 98 J.arrayEnd(); 99 J.attributeEnd(); 100 SimOption++; 101 } 102 J.objectEnd(); 103 104 Out->keep(); 105 106 return EC; 107 } 108 109 int main(int argc, const char *argv[]) { 110 InitLLVM X(argc, argv); 111 112 cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); 113 114 LLVMContext CurrContext; 115 SMDiagnostic Err; 116 std::unique_ptr<Module> ModuleToAnalyze = 117 parseIRFile(InputSourceFile, Err, CurrContext); 118 119 if (!ModuleToAnalyze) { 120 Err.print(argv[0], errs()); 121 return 1; 122 } 123 124 // Mapping from an Instruction pointer to its occurrence in a sequential 125 // list of all the Instructions in a Module. 126 DenseMap<Instruction *, unsigned> LLVMInstNum; 127 128 // We give each instruction a number, which gives us a start and end value 129 // for the beginning and end of each IRSimilarityCandidate. 130 unsigned InstructionNumber = 1; 131 for (Function &F : *ModuleToAnalyze) 132 for (BasicBlock &BB : F) 133 for (Instruction &I : BB.instructionsWithoutDebug()) 134 LLVMInstNum[&I]= InstructionNumber++; 135 136 // The similarity identifier we will use to find the similar sections. 137 IRSimilarityIdentifier SimIdent; 138 SimilarityGroupList SimilaritySections = 139 SimIdent.findSimilarity(*ModuleToAnalyze); 140 141 std::error_code E = 142 exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); 143 if (E) { 144 errs() << argv[0] << ": " << E.message() << '\n'; 145 return 2; 146 } 147 148 return 0; 149 } 150