xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-sim/llvm-sim.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 //===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This program finds similar sections of a Module, and exports them as a JSON
10 // file.
11 //
12 // To find similarities contained across multiple modules, please use llvm-link
13 // first to merge the modules.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Analysis/IRSimilarityIdentifier.h"
18 #include "llvm/IRReader/IRReader.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/InitLLVM.h"
22 #include "llvm/Support/JSON.h"
23 #include "llvm/Support/SourceMgr.h"
24 #include "llvm/Support/ToolOutputFile.h"
25 
26 using namespace llvm;
27 using namespace IRSimilarity;
28 
29 static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30                                            cl::init("-"),
31                                            cl::value_desc("filename"));
32 
33 static cl::opt<std::string> InputSourceFile(cl::Positional,
34                                             cl::desc("<Source file>"),
35                                             cl::init("-"),
36                                             cl::value_desc("filename"));
37 
38 /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39 ///
40 /// \param I - The Instruction to find the instruction number for.
41 /// \param LLVMInstNum - The mapping of Instructions to their location in the
42 /// module represented by an unsigned integer.
43 /// \returns The instruction number for \p I if it exists.
44 std::optional<unsigned>
getPositionInModule(const Instruction * I,const DenseMap<Instruction *,unsigned> & LLVMInstNum)45 getPositionInModule(const Instruction *I,
46                     const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47   assert(I && "Instruction is nullptr!");
48   DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
49   if (It == LLVMInstNum.end())
50     return std::nullopt;
51   return It->second;
52 }
53 
54 /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55 ///
56 /// \param FilePath - The path to the output location.
57 /// \param SimSections - The similarity groups to process.
58 /// \param LLVMInstNum - The mapping of Instructions to their location in the
59 /// module represented by an unsigned integer.
60 /// \returns A nonzero error code if there was a failure creating the file.
61 std::error_code
exportToFile(const StringRef FilePath,const SimilarityGroupList & SimSections,const DenseMap<Instruction *,unsigned> & LLVMInstNum)62 exportToFile(const StringRef FilePath,
63              const SimilarityGroupList &SimSections,
64              const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65   std::error_code EC;
66   std::unique_ptr<ToolOutputFile> Out(
67       new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68   if (EC)
69     return EC;
70 
71   json::OStream J(Out->os(), 1);
72   J.objectBegin();
73 
74   unsigned SimOption = 1;
75   // Process each list of SimilarityGroups organized by the Module.
76   for (const SimilarityGroup &G : SimSections) {
77     std::string SimOptionStr = std::to_string(SimOption);
78     J.attributeBegin(SimOptionStr);
79     J.arrayBegin();
80     // For each file there is a list of the range where the similarity
81     // exists.
82     for (const IRSimilarityCandidate &C : G) {
83       std::optional<unsigned> Start =
84           getPositionInModule((*C.front()).Inst, LLVMInstNum);
85       std::optional<unsigned> End =
86           getPositionInModule((*C.back()).Inst, LLVMInstNum);
87 
88       assert(Start &&
89              "Could not find instruction number for first instruction");
90       assert(End && "Could not find instruction number for last instruction");
91 
92       J.object([&] {
93         J.attribute("start", *Start);
94         J.attribute("end", *End);
95       });
96     }
97     J.arrayEnd();
98     J.attributeEnd();
99     SimOption++;
100   }
101   J.objectEnd();
102 
103   Out->keep();
104 
105   return EC;
106 }
107 
main(int argc,const char * argv[])108 int main(int argc, const char *argv[]) {
109   InitLLVM X(argc, argv);
110 
111   cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
112 
113   LLVMContext CurrContext;
114   SMDiagnostic Err;
115   std::unique_ptr<Module> ModuleToAnalyze =
116       parseIRFile(InputSourceFile, Err, CurrContext);
117 
118   if (!ModuleToAnalyze) {
119     Err.print(argv[0], errs());
120     return 1;
121   }
122 
123   // Mapping from an Instruction pointer to its occurrence in a sequential
124   // list of all the Instructions in a Module.
125   DenseMap<Instruction *, unsigned> LLVMInstNum;
126 
127   // We give each instruction a number, which gives us a start and end value
128   // for the beginning and end of each IRSimilarityCandidate.
129   unsigned InstructionNumber = 1;
130   for (Function &F : *ModuleToAnalyze)
131     for (BasicBlock &BB : F)
132       for (Instruction &I : BB.instructionsWithoutDebug())
133         LLVMInstNum[&I]= InstructionNumber++;
134 
135   // The similarity identifier we will use to find the similar sections.
136   IRSimilarityIdentifier SimIdent;
137   SimilarityGroupList SimilaritySections =
138       SimIdent.findSimilarity(*ModuleToAnalyze);
139 
140   std::error_code E =
141       exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
142   if (E) {
143     errs() << argv[0] << ": " << E.message() << '\n';
144     return 2;
145   }
146 
147   return 0;
148 }
149