xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-sim/llvm-sim.cpp (revision a7beca6fb113986839de73b7cf73d933464898c6)
1  //===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This program finds similar sections of a Module, and exports them as a JSON
10  // file.
11  //
12  // To find similarities contained across multiple modules, please use llvm-link
13  // first to merge the modules.
14  //
15  //===----------------------------------------------------------------------===//
16  
17  #include "llvm/Analysis/IRSimilarityIdentifier.h"
18  #include "llvm/IRReader/IRReader.h"
19  #include "llvm/Support/CommandLine.h"
20  #include "llvm/Support/FileSystem.h"
21  #include "llvm/Support/InitLLVM.h"
22  #include "llvm/Support/JSON.h"
23  #include "llvm/Support/SourceMgr.h"
24  #include "llvm/Support/ToolOutputFile.h"
25  
26  using namespace llvm;
27  using namespace IRSimilarity;
28  
29  static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30                                             cl::init("-"),
31                                             cl::value_desc("filename"));
32  
33  static cl::opt<std::string> InputSourceFile(cl::Positional,
34                                              cl::desc("<Source file>"),
35                                              cl::init("-"),
36                                              cl::value_desc("filename"));
37  
38  /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39  ///
40  /// \param I - The Instruction to find the instruction number for.
41  /// \param LLVMInstNum - The mapping of Instructions to their location in the
42  /// module represented by an unsigned integer.
43  /// \returns The instruction number for \p I if it exists.
44  std::optional<unsigned>
45  getPositionInModule(const Instruction *I,
46                      const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47    assert(I && "Instruction is nullptr!");
48    DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
49    if (It == LLVMInstNum.end())
50      return std::nullopt;
51    return It->second;
52  }
53  
54  /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55  ///
56  /// \param FilePath - The path to the output location.
57  /// \param SimSections - The similarity groups to process.
58  /// \param LLVMInstNum - The mapping of Instructions to their location in the
59  /// module represented by an unsigned integer.
60  /// \returns A nonzero error code if there was a failure creating the file.
61  std::error_code
62  exportToFile(const StringRef FilePath,
63               const SimilarityGroupList &SimSections,
64               const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65    std::error_code EC;
66    std::unique_ptr<ToolOutputFile> Out(
67        new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68    if (EC)
69      return EC;
70  
71    json::OStream J(Out->os(), 1);
72    J.objectBegin();
73  
74    unsigned SimOption = 1;
75    // Process each list of SimilarityGroups organized by the Module.
76    for (const SimilarityGroup &G : SimSections) {
77      std::string SimOptionStr = std::to_string(SimOption);
78      J.attributeBegin(SimOptionStr);
79      J.arrayBegin();
80      // For each file there is a list of the range where the similarity
81      // exists.
82      for (const IRSimilarityCandidate &C : G) {
83        std::optional<unsigned> Start =
84            getPositionInModule((*C.front()).Inst, LLVMInstNum);
85        std::optional<unsigned> End =
86            getPositionInModule((*C.back()).Inst, LLVMInstNum);
87  
88        assert(Start &&
89               "Could not find instruction number for first instruction");
90        assert(End && "Could not find instruction number for last instruction");
91  
92        J.object([&] {
93          J.attribute("start", *Start);
94          J.attribute("end", *End);
95        });
96      }
97      J.arrayEnd();
98      J.attributeEnd();
99      SimOption++;
100    }
101    J.objectEnd();
102  
103    Out->keep();
104  
105    return EC;
106  }
107  
108  int main(int argc, const char *argv[]) {
109    InitLLVM X(argc, argv);
110  
111    cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
112  
113    LLVMContext CurrContext;
114    SMDiagnostic Err;
115    std::unique_ptr<Module> ModuleToAnalyze =
116        parseIRFile(InputSourceFile, Err, CurrContext);
117  
118    if (!ModuleToAnalyze) {
119      Err.print(argv[0], errs());
120      return 1;
121    }
122  
123    // Mapping from an Instruction pointer to its occurrence in a sequential
124    // list of all the Instructions in a Module.
125    DenseMap<Instruction *, unsigned> LLVMInstNum;
126  
127    // We give each instruction a number, which gives us a start and end value
128    // for the beginning and end of each IRSimilarityCandidate.
129    unsigned InstructionNumber = 1;
130    for (Function &F : *ModuleToAnalyze)
131      for (BasicBlock &BB : F)
132        for (Instruction &I : BB.instructionsWithoutDebug())
133          LLVMInstNum[&I]= InstructionNumber++;
134  
135    // The similarity identifier we will use to find the similar sections.
136    IRSimilarityIdentifier SimIdent;
137    SimilarityGroupList SimilaritySections =
138        SimIdent.findSimilarity(*ModuleToAnalyze);
139  
140    std::error_code E =
141        exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
142    if (E) {
143      errs() << argv[0] << ": " << E.message() << '\n';
144      return 2;
145    }
146  
147    return 0;
148  }
149