1 //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This program finds similar sections of a Module, and exports them as a JSON 10 // file. 11 // 12 // To find similarities contained across multiple modules, please use llvm-link 13 // first to merge the modules. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Analysis/IRSimilarityIdentifier.h" 18 #include "llvm/IRReader/IRReader.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/InitLLVM.h" 22 #include "llvm/Support/JSON.h" 23 #include "llvm/Support/SourceMgr.h" 24 #include "llvm/Support/ToolOutputFile.h" 25 26 using namespace llvm; 27 using namespace IRSimilarity; 28 29 static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"), 30 cl::init("-"), 31 cl::value_desc("filename")); 32 33 static cl::opt<std::string> InputSourceFile(cl::Positional, 34 cl::desc("<Source file>"), 35 cl::init("-"), 36 cl::value_desc("filename")); 37 38 /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. 39 /// 40 /// \param I - The Instruction to find the instruction number for. 41 /// \param LLVMInstNum - The mapping of Instructions to their location in the 42 /// module represented by an unsigned integer. 43 /// \returns The instruction number for \p I if it exists. 44 std::optional<unsigned> 45 getPositionInModule(const Instruction *I, 46 const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 47 assert(I && "Instruction is nullptr!"); 48 DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I); 49 if (It == LLVMInstNum.end()) 50 return std::nullopt; 51 return It->second; 52 } 53 54 /// Exports the given SimilarityGroups to a JSON file at \p FilePath. 55 /// 56 /// \param FilePath - The path to the output location. 57 /// \param SimSections - The similarity groups to process. 58 /// \param LLVMInstNum - The mapping of Instructions to their location in the 59 /// module represented by an unsigned integer. 60 /// \returns A nonzero error code if there was a failure creating the file. 61 std::error_code 62 exportToFile(const StringRef FilePath, 63 const SimilarityGroupList &SimSections, 64 const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 65 std::error_code EC; 66 std::unique_ptr<ToolOutputFile> Out( 67 new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); 68 if (EC) 69 return EC; 70 71 json::OStream J(Out->os(), 1); 72 J.objectBegin(); 73 74 unsigned SimOption = 1; 75 // Process each list of SimilarityGroups organized by the Module. 76 for (const SimilarityGroup &G : SimSections) { 77 std::string SimOptionStr = std::to_string(SimOption); 78 J.attributeBegin(SimOptionStr); 79 J.arrayBegin(); 80 // For each file there is a list of the range where the similarity 81 // exists. 82 for (const IRSimilarityCandidate &C : G) { 83 std::optional<unsigned> Start = 84 getPositionInModule((*C.front()).Inst, LLVMInstNum); 85 std::optional<unsigned> End = 86 getPositionInModule((*C.back()).Inst, LLVMInstNum); 87 88 assert(Start && 89 "Could not find instruction number for first instruction"); 90 assert(End && "Could not find instruction number for last instruction"); 91 92 J.object([&] { 93 J.attribute("start", *Start); 94 J.attribute("end", *End); 95 }); 96 } 97 J.arrayEnd(); 98 J.attributeEnd(); 99 SimOption++; 100 } 101 J.objectEnd(); 102 103 Out->keep(); 104 105 return EC; 106 } 107 108 int main(int argc, const char *argv[]) { 109 InitLLVM X(argc, argv); 110 111 cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); 112 113 LLVMContext CurrContext; 114 SMDiagnostic Err; 115 std::unique_ptr<Module> ModuleToAnalyze = 116 parseIRFile(InputSourceFile, Err, CurrContext); 117 118 if (!ModuleToAnalyze) { 119 Err.print(argv[0], errs()); 120 return 1; 121 } 122 123 // Mapping from an Instruction pointer to its occurrence in a sequential 124 // list of all the Instructions in a Module. 125 DenseMap<Instruction *, unsigned> LLVMInstNum; 126 127 // We give each instruction a number, which gives us a start and end value 128 // for the beginning and end of each IRSimilarityCandidate. 129 unsigned InstructionNumber = 1; 130 for (Function &F : *ModuleToAnalyze) 131 for (BasicBlock &BB : F) 132 for (Instruction &I : BB.instructionsWithoutDebug()) 133 LLVMInstNum[&I]= InstructionNumber++; 134 135 // The similarity identifier we will use to find the similar sections. 136 IRSimilarityIdentifier SimIdent; 137 SimilarityGroupList SimilaritySections = 138 SimIdent.findSimilarity(*ModuleToAnalyze); 139 140 std::error_code E = 141 exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); 142 if (E) { 143 errs() << argv[0] << ": " << E.message() << '\n'; 144 return 2; 145 } 146 147 return 0; 148 } 149