xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/HashRecognize.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- HashRecognize.cpp ----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The HashRecognize analysis recognizes unoptimized polynomial hash functions
10 // with operations over a Galois field of characteristic 2, also called binary
11 // fields, or GF(2^n): this class of hash functions can be optimized using a
12 // lookup-table-driven implementation, or with target-specific instructions.
13 // Examples:
14 //
15 //  1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2).
16 //  2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a
17 //     rolling hash polynomial division in GF(2).
18 //  3. Rijndael MixColumns, a step in AES computation, which is a polynomial
19 //     multiplication in GF(2^3).
20 //  4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM),
21 //     which is a polynomial evaluation in GF(2^128).
22 //
23 // All of them use an irreducible generating polynomial of degree m,
24 //
25 //    c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0
26 //
27 // where each coefficient c is can take values in GF(2^n), where 2^n is termed
28 // the order of the Galois field. For GF(2), each coefficient can take values
29 // either 0 or 1, and the polynomial is simply represented by m+1 bits,
30 // corresponding to the coefficients. The different variants of CRC are named by
31 // degree of generating polynomial used: so CRC-32 would use a polynomial of
32 // degree 32.
33 //
34 // The reason algorithms on GF(2^n) can be optimized with a lookup-table is the
35 // following: in such fields, polynomial addition and subtraction are identical
36 // and equivalent to XOR, polynomial multiplication is an AND, and polynomial
37 // division is identity: the XOR and AND operations in unoptimized
38 // implementations are performed bit-wise, and can be optimized to be performed
39 // chunk-wise, by interleaving copies of the generating polynomial, and storing
40 // the pre-computed values in a table.
41 //
42 // A generating polynomial of m bits always has the MSB set, so we usually
43 // omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial:
44 //
45 //   (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021
46 //
47 // Transmissions are either in big-endian or little-endian form, and hash
48 // algorithms are written according to this. For example, IEEE 802 and RS-232
49 // specify little-endian transmission.
50 //
51 //===----------------------------------------------------------------------===//
52 //
53 // At the moment, we only recognize the CRC algorithm.
54 // Documentation on CRC32 from the kernel:
55 // https://www.kernel.org/doc/Documentation/crc32.txt
56 //
57 //
58 //===----------------------------------------------------------------------===//
59 
60 #include "llvm/Analysis/HashRecognize.h"
61 #include "llvm/ADT/APInt.h"
62 #include "llvm/Analysis/LoopAnalysisManager.h"
63 #include "llvm/Analysis/LoopInfo.h"
64 #include "llvm/Analysis/ScalarEvolution.h"
65 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
66 #include "llvm/Analysis/ValueTracking.h"
67 #include "llvm/IR/PatternMatch.h"
68 #include "llvm/Support/KnownBits.h"
69 
70 using namespace llvm;
71 using namespace PatternMatch;
72 using namespace SCEVPatternMatch;
73 
74 #define DEBUG_TYPE "hash-recognize"
75 
76 // KnownBits for a PHI node. There are at most two PHI nodes, corresponding to
77 // the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not
78 // relevant.
79 using KnownPhiMap = SmallDenseMap<const PHINode *, KnownBits, 2>;
80 
81 // A pair of a PHI node along with its incoming value from within a loop.
82 using PhiStepPair = std::pair<const PHINode *, const Instruction *>;
83 
84 /// A much simpler version of ValueTracking, in that it computes KnownBits of
85 /// values, except that it computes the evolution of KnownBits in a loop with a
86 /// given trip count, and predication is specialized for a significant-bit
87 /// check.
88 class ValueEvolution {
89   const unsigned TripCount;
90   const bool ByteOrderSwapped;
91   APInt GenPoly;
92   StringRef ErrStr;
93 
94   // Compute the KnownBits of a BinaryOperator.
95   KnownBits computeBinOp(const BinaryOperator *I);
96 
97   // Compute the KnownBits of an Instruction.
98   KnownBits computeInstr(const Instruction *I);
99 
100   // Compute the KnownBits of a Value.
101   KnownBits compute(const Value *V);
102 
103 public:
104   // ValueEvolution is meant to be constructed with the TripCount of the loop,
105   // and whether the polynomial algorithm is big-endian, for the significant-bit
106   // check.
107   ValueEvolution(unsigned TripCount, bool ByteOrderSwapped);
108 
109   // Given a list of PHI nodes along with their incoming value from within the
110   // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on
111   // the final iteration. Returns true on success and false on error.
112   bool computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions);
113 
114   // In case ValueEvolution encounters an error, this is meant to be used for a
115   // precise error message.
116   StringRef getError() const { return ErrStr; }
117 
118   // The computed KnownBits for each PHI node, which is populated after
119   // computeEvolutions is called.
120   KnownPhiMap KnownPhis;
121 };
122 
123 ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
124     : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {}
125 
126 KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) {
127   KnownBits KnownL(compute(I->getOperand(0)));
128   KnownBits KnownR(compute(I->getOperand(1)));
129 
130   switch (I->getOpcode()) {
131   case Instruction::BinaryOps::And:
132     return KnownL & KnownR;
133   case Instruction::BinaryOps::Or:
134     return KnownL | KnownR;
135   case Instruction::BinaryOps::Xor:
136     return KnownL ^ KnownR;
137   case Instruction::BinaryOps::Shl: {
138     auto *OBO = cast<OverflowingBinaryOperator>(I);
139     return KnownBits::shl(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
140                           OBO->hasNoSignedWrap());
141   }
142   case Instruction::BinaryOps::LShr:
143     return KnownBits::lshr(KnownL, KnownR);
144   case Instruction::BinaryOps::AShr:
145     return KnownBits::ashr(KnownL, KnownR);
146   case Instruction::BinaryOps::Add: {
147     auto *OBO = cast<OverflowingBinaryOperator>(I);
148     return KnownBits::add(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
149                           OBO->hasNoSignedWrap());
150   }
151   case Instruction::BinaryOps::Sub: {
152     auto *OBO = cast<OverflowingBinaryOperator>(I);
153     return KnownBits::sub(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
154                           OBO->hasNoSignedWrap());
155   }
156   case Instruction::BinaryOps::Mul: {
157     Value *Op0 = I->getOperand(0);
158     Value *Op1 = I->getOperand(1);
159     bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(Op0);
160     return KnownBits::mul(KnownL, KnownR, SelfMultiply);
161   }
162   case Instruction::BinaryOps::UDiv:
163     return KnownBits::udiv(KnownL, KnownR);
164   case Instruction::BinaryOps::SDiv:
165     return KnownBits::sdiv(KnownL, KnownR);
166   case Instruction::BinaryOps::URem:
167     return KnownBits::urem(KnownL, KnownR);
168   case Instruction::BinaryOps::SRem:
169     return KnownBits::srem(KnownL, KnownR);
170   default:
171     ErrStr = "Unknown BinaryOperator";
172     unsigned BitWidth = I->getType()->getScalarSizeInBits();
173     return {BitWidth};
174   }
175 }
176 
177 KnownBits ValueEvolution::computeInstr(const Instruction *I) {
178   unsigned BitWidth = I->getType()->getScalarSizeInBits();
179 
180   // We look up in the map that contains the KnownBits of the PHI from the
181   // previous iteration.
182   if (const PHINode *P = dyn_cast<PHINode>(I))
183     return KnownPhis.lookup_or(P, BitWidth);
184 
185   // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch
186   // that is predicated on the (least|most)-significant-bit check.
187   CmpPredicate Pred;
188   Value *L, *R, *TV, *FV;
189   if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Value(TV),
190                         m_Value(FV)))) {
191     // We need to check LCR against [0, 2) in the little-endian case, because
192     // the RCR check is insufficient: it is simply [0, 1).
193     if (!ByteOrderSwapped) {
194       KnownBits KnownL = compute(L);
195       unsigned ICmpBW = KnownL.getBitWidth();
196       auto LCR = ConstantRange::fromKnownBits(KnownL, false);
197       auto CheckLCR = ConstantRange(APInt::getZero(ICmpBW), APInt(ICmpBW, 2));
198       if (LCR != CheckLCR) {
199         ErrStr = "Bad LHS of significant-bit-check";
200         return {BitWidth};
201       }
202     }
203 
204     // Check that the predication is on (most|least) significant bit.
205     KnownBits KnownR = compute(R);
206     unsigned ICmpBW = KnownR.getBitWidth();
207     auto RCR = ConstantRange::fromKnownBits(KnownR, false);
208     auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, RCR);
209     ConstantRange CheckRCR(APInt::getZero(ICmpBW),
210                            ByteOrderSwapped ? APInt::getSignedMinValue(ICmpBW)
211                                             : APInt(ICmpBW, 1));
212     if (AllowedR == CheckRCR)
213       return compute(TV);
214     if (AllowedR.inverse() == CheckRCR)
215       return compute(FV);
216 
217     ErrStr = "Bad RHS of significant-bit-check";
218     return {BitWidth};
219   }
220 
221   if (auto *BO = dyn_cast<BinaryOperator>(I))
222     return computeBinOp(BO);
223 
224   switch (I->getOpcode()) {
225   case Instruction::CastOps::Trunc:
226     return compute(I->getOperand(0)).trunc(BitWidth);
227   case Instruction::CastOps::ZExt:
228     return compute(I->getOperand(0)).zext(BitWidth);
229   case Instruction::CastOps::SExt:
230     return compute(I->getOperand(0)).sext(BitWidth);
231   default:
232     ErrStr = "Unknown Instruction";
233     return {BitWidth};
234   }
235 }
236 
237 KnownBits ValueEvolution::compute(const Value *V) {
238   if (auto *CI = dyn_cast<ConstantInt>(V))
239     return KnownBits::makeConstant(CI->getValue());
240 
241   if (auto *I = dyn_cast<Instruction>(V))
242     return computeInstr(I);
243 
244   ErrStr = "Unknown Value";
245   unsigned BitWidth = V->getType()->getScalarSizeInBits();
246   return {BitWidth};
247 }
248 
249 bool ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) {
250   for (unsigned I = 0; I < TripCount; ++I)
251     for (auto [Phi, Step] : PhiEvolutions)
252       KnownPhis.emplace_or_assign(Phi, computeInstr(Step));
253 
254   return ErrStr.empty();
255 }
256 
257 /// A structure that can hold either a Simple Recurrence or a Conditional
258 /// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand
259 /// of the BO, while in a Conditional Recurrence, it is a SelectInst.
260 struct RecurrenceInfo {
261   const Loop &L;
262   const PHINode *Phi = nullptr;
263   BinaryOperator *BO = nullptr;
264   Value *Start = nullptr;
265   Value *Step = nullptr;
266   std::optional<APInt> ExtraConst;
267 
268   RecurrenceInfo(const Loop &L) : L(L) {}
269   operator bool() const { return BO; }
270 
271   void print(raw_ostream &OS, unsigned Indent = 0) const {
272     OS.indent(Indent) << "Phi: ";
273     Phi->print(OS);
274     OS << "\n";
275     OS.indent(Indent) << "BinaryOperator: ";
276     BO->print(OS);
277     OS << "\n";
278     OS.indent(Indent) << "Start: ";
279     Start->print(OS);
280     OS << "\n";
281     OS.indent(Indent) << "Step: ";
282     Step->print(OS);
283     OS << "\n";
284     if (ExtraConst) {
285       OS.indent(Indent) << "ExtraConst: ";
286       ExtraConst->print(OS, false);
287       OS << "\n";
288     }
289   }
290 
291 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
292   LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
293 #endif
294 
295   bool matchSimpleRecurrence(const PHINode *P);
296   bool matchConditionalRecurrence(
297       const PHINode *P,
298       Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
299 
300 private:
301   BinaryOperator *digRecurrence(
302       Instruction *V,
303       Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
304 };
305 
306 /// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence
307 /// cycle of the form:
308 ///
309 /// loop:
310 ///    %rec = phi [%start, %entry], [%BO, %loop]
311 ///     ...
312 ///     %BO = binop %rec, %step
313 ///
314 /// or
315 ///
316 /// loop:
317 ///    %rec = phi [%start, %entry], [%BO, %loop]
318 ///    ...
319 ///    %BO = binop %step, %rec
320 ///
321 bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) {
322   Phi = P;
323   return llvm::matchSimpleRecurrence(Phi, BO, Start, Step);
324 }
325 
326 /// Digs for a recurrence starting with \p V hitting the PHI node in a use-def
327 /// chain. Used by matchConditionalRecurrence.
328 BinaryOperator *
329 RecurrenceInfo::digRecurrence(Instruction *V,
330                               Instruction::BinaryOps BOWithConstOpToMatch) {
331   SmallVector<Instruction *> Worklist;
332   Worklist.push_back(V);
333   while (!Worklist.empty()) {
334     Instruction *I = Worklist.pop_back_val();
335 
336     // Don't add a PHI's operands to the Worklist.
337     if (isa<PHINode>(I))
338       continue;
339 
340     // Find a recurrence over a BinOp, by matching either of its operands
341     // with with the PHINode.
342     if (match(I, m_c_BinOp(m_Value(), m_Specific(Phi))))
343       return cast<BinaryOperator>(I);
344 
345     // Bind to ExtraConst, if we match exactly one.
346     if (I->getOpcode() == BOWithConstOpToMatch) {
347       if (ExtraConst)
348         return nullptr;
349       const APInt *C = nullptr;
350       if (match(I, m_c_BinOp(m_APInt(C), m_Value())))
351         ExtraConst = *C;
352     }
353 
354     // Continue along the use-def chain.
355     for (Use &U : I->operands())
356       if (auto *UI = dyn_cast<Instruction>(U))
357         if (L.contains(UI))
358           Worklist.push_back(UI);
359   }
360   return nullptr;
361 }
362 
363 /// A Conditional Recurrence is a recurrence of the form:
364 ///
365 /// loop:
366 ///    %rec = phi [%start, %entry], [%step, %loop]
367 ///    ...
368 ///    %step = select _, %tv, %fv
369 ///
370 /// where %tv and %fv ultimately end up using %rec via the same %BO instruction,
371 /// after digging through the use-def chain.
372 ///
373 /// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging
374 /// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched,
375 /// and ExtraConst is a constant operand of that BinOp. This peculiarity exists,
376 /// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the
377 /// ExtraConst ends up being the generating polynomial.
378 bool RecurrenceInfo::matchConditionalRecurrence(
379     const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) {
380   Phi = P;
381   if (Phi->getNumIncomingValues() != 2)
382     return false;
383 
384   for (unsigned Idx = 0; Idx != 2; ++Idx) {
385     Value *FoundStep = Phi->getIncomingValue(Idx);
386     Value *FoundStart = Phi->getIncomingValue(!Idx);
387 
388     Instruction *TV, *FV;
389     if (!match(FoundStep,
390                m_Select(m_Cmp(), m_Instruction(TV), m_Instruction(FV))))
391       continue;
392 
393     // For a conditional recurrence, both the true and false values of the
394     // select must ultimately end up in the same recurrent BinOp.
395     BinaryOperator *FoundBO = digRecurrence(TV, BOWithConstOpToMatch);
396     BinaryOperator *AltBO = digRecurrence(FV, BOWithConstOpToMatch);
397     if (!FoundBO || FoundBO != AltBO)
398       return false;
399 
400     if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) {
401       LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp "
402                            "with constant in conditional recurrence\n");
403       return false;
404     }
405 
406     BO = FoundBO;
407     Start = FoundStart;
408     Step = FoundStep;
409     return true;
410   }
411   return false;
412 }
413 
414 /// Iterates over all the phis in \p LoopLatch, and attempts to extract a
415 /// Conditional Recurrence and an optional Simple Recurrence.
416 static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>>
417 getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) {
418   auto Phis = LoopLatch->phis();
419   unsigned NumPhis = std::distance(Phis.begin(), Phis.end());
420   if (NumPhis != 2 && NumPhis != 3)
421     return {};
422 
423   RecurrenceInfo SimpleRecurrence(L);
424   RecurrenceInfo ConditionalRecurrence(L);
425   for (PHINode &P : Phis) {
426     if (&P == IndVar)
427       continue;
428     if (!SimpleRecurrence)
429       SimpleRecurrence.matchSimpleRecurrence(&P);
430     if (!ConditionalRecurrence)
431       ConditionalRecurrence.matchConditionalRecurrence(
432           &P, Instruction::BinaryOps::Xor);
433   }
434   if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence))
435     return {};
436   return std::make_pair(SimpleRecurrence, ConditionalRecurrence);
437 }
438 
439 PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS,
440                                Value *ComputedValue, bool ByteOrderSwapped,
441                                Value *LHSAux)
442     : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue),
443       ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {}
444 
445 /// In the big-endian case, checks the bottom N bits against CheckFn, and that
446 /// the rest are unknown. In the little-endian case, checks the top N bits
447 /// against CheckFn, and that the rest are unknown. Callers usually call this
448 /// function with N = TripCount, and CheckFn checking that the remainder bits of
449 /// the CRC polynomial division are zero.
450 static bool checkExtractBits(const KnownBits &Known, unsigned N,
451                              function_ref<bool(const KnownBits &)> CheckFn,
452                              bool ByteOrderSwapped) {
453   // Check that the entire thing is a constant.
454   if (N == Known.getBitWidth())
455     return CheckFn(Known.extractBits(N, 0));
456 
457   // Check that the {top, bottom} N bits are not unknown and that the {bottom,
458   // top} N bits are known.
459   unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N;
460   unsigned SwappedBitPos = ByteOrderSwapped ? N : 0;
461   return CheckFn(Known.extractBits(N, BitPos)) &&
462          Known.extractBits(Known.getBitWidth() - N, SwappedBitPos).isUnknown();
463 }
464 
465 /// Generate a lookup table of 256 entries by interleaving the generating
466 /// polynomial. The optimization technique of table-lookup for CRC is also
467 /// called the Sarwate algorithm.
468 CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
469                                         bool ByteOrderSwapped) {
470   unsigned BW = GenPoly.getBitWidth();
471   CRCTable Table;
472   Table[0] = APInt::getZero(BW);
473 
474   if (ByteOrderSwapped) {
475     APInt CRCInit = APInt::getSignedMinValue(BW);
476     for (unsigned I = 1; I < 256; I <<= 1) {
477       CRCInit = CRCInit.shl(1) ^
478                 (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW));
479       for (unsigned J = 0; J < I; ++J)
480         Table[I + J] = CRCInit ^ Table[J];
481     }
482     return Table;
483   }
484 
485   APInt CRCInit(BW, 1);
486   for (unsigned I = 128; I; I >>= 1) {
487     CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW));
488     for (unsigned J = 0; J < 256; J += (I << 1))
489       Table[I + J] = CRCInit ^ Table[J];
490   }
491   return Table;
492 }
493 
494 /// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain
495 /// of \p SI's condition, ignoring any casts. The purpose of this function is to
496 /// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC
497 /// computation. We cannot check the correctness of casts at this point, and
498 /// rely on the KnownBits propagation to check correctness of the CRC
499 /// computation.
500 ///
501 /// In other words, it checks for the following pattern:
502 ///
503 /// loop:
504 ///   %P1 = phi [_, %entry], [%P1.next, %loop]
505 ///   %P2 = phi [_, %entry], [%P2.next, %loop]
506 ///   ...
507 ///   %xor = xor (CastOrSelf %P1), (CastOrSelf %P2)
508 ///
509 /// where %xor is in the use-def chain of \p SI's condition.
510 static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1,
511                                      const PHINode *P2, const Loop &L) {
512   SmallVector<const Instruction *> Worklist;
513 
514   // matchConditionalRecurrence has already ensured that the SelectInst's
515   // condition is an Instruction.
516   Worklist.push_back(cast<Instruction>(SI->getCondition()));
517 
518   while (!Worklist.empty()) {
519     const Instruction *I = Worklist.pop_back_val();
520 
521     // Don't add a PHI's operands to the Worklist.
522     if (isa<PHINode>(I))
523       continue;
524 
525     // If we match an XOR of the two PHIs ignoring casts, we're done.
526     if (match(I, m_c_Xor(m_CastOrSelf(m_Specific(P1)),
527                          m_CastOrSelf(m_Specific(P2)))))
528       return true;
529 
530     // Continue along the use-def chain.
531     for (const Use &U : I->operands())
532       if (auto *UI = dyn_cast<Instruction>(U))
533         if (L.contains(UI))
534           Worklist.push_back(UI);
535   }
536   return false;
537 }
538 
539 // Recognizes a multiplication or division by the constant two, using SCEV. By
540 // doing this, we're immune to whether the IR expression is mul/udiv or
541 // equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul,
542 // and std::nullopt otherwise.
543 static std::optional<bool> isBigEndianBitShift(Value *V, ScalarEvolution &SE) {
544   if (!V->getType()->isIntegerTy())
545     return {};
546 
547   const SCEV *E = SE.getSCEV(V);
548   if (match(E, m_scev_UDiv(m_SCEV(), m_scev_SpecificInt(2))))
549     return false;
550   if (match(E, m_scev_Mul(m_scev_SpecificInt(2), m_SCEV())))
551     return true;
552   return {};
553 }
554 
555 /// The main entry point for analyzing a loop and recognizing the CRC algorithm.
556 /// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on
557 /// failure.
558 std::variant<PolynomialInfo, ErrBits, StringRef>
559 HashRecognize::recognizeCRC() const {
560   if (!L.isInnermost())
561     return "Loop is not innermost";
562   BasicBlock *Latch = L.getLoopLatch();
563   BasicBlock *Exit = L.getExitBlock();
564   const PHINode *IndVar = L.getCanonicalInductionVariable();
565   if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1)
566     return "Loop not in canonical form";
567   unsigned TC = SE.getSmallConstantTripCount(&L);
568   if (!TC || TC > 256 || TC % 8)
569     return "Unable to find a small constant byte-multiple trip count";
570 
571   auto R = getRecurrences(Latch, IndVar, L);
572   if (!R)
573     return "Found stray PHI";
574   auto [SimpleRecurrence, ConditionalRecurrence] = *R;
575   if (!ConditionalRecurrence)
576     return "Unable to find conditional recurrence";
577 
578   // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv
579   // with two, or in other words, that they're single bit-shifts.
580   std::optional<bool> ByteOrderSwapped =
581       isBigEndianBitShift(ConditionalRecurrence.BO, SE);
582   if (!ByteOrderSwapped)
583     return "Loop with non-unit bitshifts";
584   if (SimpleRecurrence) {
585     if (isBigEndianBitShift(SimpleRecurrence.BO, SE) != ByteOrderSwapped)
586       return "Loop with non-unit bitshifts";
587 
588     // Ensure that the PHIs have exactly two uses:
589     // the bit-shift, and the XOR (or a cast feeding into the XOR).
590     if (!ConditionalRecurrence.Phi->hasNUses(2) ||
591         !SimpleRecurrence.Phi->hasNUses(2))
592       return "Recurrences have stray uses";
593 
594     // Check that the SelectInst ConditionalRecurrence.Step is conditional on
595     // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi.
596     if (!isConditionalOnXorOfPHIs(cast<SelectInst>(ConditionalRecurrence.Step),
597                                   SimpleRecurrence.Phi,
598                                   ConditionalRecurrence.Phi, L))
599       return "Recurrences not intertwined with XOR";
600   }
601 
602   // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS.
603   Value *LHS = ConditionalRecurrence.Start;
604   Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr;
605   if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth()
606                    : LHS->getType()->getIntegerBitWidth()))
607     return "Loop iterations exceed bitwidth of data";
608 
609   // Make sure that the computed value is used in the exit block: this should be
610   // true even if it is only really used in an outer loop's exit block, since
611   // the loop is in LCSSA form.
612   auto *ComputedValue = cast<SelectInst>(ConditionalRecurrence.Step);
613   if (none_of(ComputedValue->users(), [Exit](User *U) {
614         auto *UI = dyn_cast<Instruction>(U);
615         return UI && UI->getParent() == Exit;
616       }))
617     return "Unable to find use of computed value in loop exit block";
618 
619   assert(ConditionalRecurrence.ExtraConst &&
620          "Expected ExtraConst in conditional recurrence");
621   const APInt &GenPoly = *ConditionalRecurrence.ExtraConst;
622 
623   // PhiEvolutions are pairs of PHINodes along with their incoming value from
624   // within the loop, which we term as their step. Note that in the case of a
625   // Simple Recurrence, Step is an operand of the BO, while in a Conditional
626   // Recurrence, it is a SelectInst.
627   SmallVector<PhiStepPair, 2> PhiEvolutions;
628   PhiEvolutions.emplace_back(ConditionalRecurrence.Phi, ComputedValue);
629   if (SimpleRecurrence)
630     PhiEvolutions.emplace_back(SimpleRecurrence.Phi, SimpleRecurrence.BO);
631 
632   ValueEvolution VE(TC, *ByteOrderSwapped);
633   if (!VE.computeEvolutions(PhiEvolutions))
634     return VE.getError();
635   KnownBits ResultBits = VE.KnownPhis.at(ConditionalRecurrence.Phi);
636 
637   unsigned N = std::min(TC, ResultBits.getBitWidth());
638   auto IsZero = [](const KnownBits &K) { return K.isZero(); };
639   if (!checkExtractBits(ResultBits, N, IsZero, *ByteOrderSwapped))
640     return ErrBits(ResultBits, TC, *ByteOrderSwapped);
641 
642   return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped,
643                         LHSAux);
644 }
645 
646 void CRCTable::print(raw_ostream &OS) const {
647   for (unsigned I = 0; I < 256; I++) {
648     (*this)[I].print(OS, false);
649     OS << (I % 16 == 15 ? '\n' : ' ');
650   }
651 }
652 
653 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 void CRCTable::dump() const { print(dbgs()); }
655 #endif
656 
657 void HashRecognize::print(raw_ostream &OS) const {
658   if (!L.isInnermost())
659     return;
660   OS << "HashRecognize: Checking a loop in '"
661      << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr()
662      << "\n";
663   auto Ret = recognizeCRC();
664   if (!std::holds_alternative<PolynomialInfo>(Ret)) {
665     OS << "Did not find a hash algorithm\n";
666     if (std::holds_alternative<StringRef>(Ret))
667       OS << "Reason: " << std::get<StringRef>(Ret) << "\n";
668     if (std::holds_alternative<ErrBits>(Ret)) {
669       auto [Actual, Iter, ByteOrderSwapped] = std::get<ErrBits>(Ret);
670       OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top ")
671          << Iter << " bits zero (";
672       Actual.print(OS);
673       OS << ")\n";
674     }
675     return;
676   }
677 
678   auto Info = std::get<PolynomialInfo>(Ret);
679   OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian ")
680      << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count "
681      << Info.TripCount << "\n";
682   OS.indent(2) << "Initial CRC: ";
683   Info.LHS->print(OS);
684   OS << "\n";
685   OS.indent(2) << "Generating polynomial: ";
686   Info.RHS.print(OS, false);
687   OS << "\n";
688   OS.indent(2) << "Computed CRC: ";
689   Info.ComputedValue->print(OS);
690   OS << "\n";
691   if (Info.LHSAux) {
692     OS.indent(2) << "Auxiliary data: ";
693     Info.LHSAux->print(OS);
694     OS << "\n";
695   }
696   OS.indent(2) << "Computed CRC lookup table:\n";
697   genSarwateTable(Info.RHS, Info.ByteOrderSwapped).print(OS);
698 }
699 
700 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
701 void HashRecognize::dump() const { print(dbgs()); }
702 #endif
703 
704 std::optional<PolynomialInfo> HashRecognize::getResult() const {
705   auto Res = HashRecognize(L, SE).recognizeCRC();
706   if (std::holds_alternative<PolynomialInfo>(Res))
707     return std::get<PolynomialInfo>(Res);
708   return std::nullopt;
709 }
710 
711 HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE)
712     : L(L), SE(SE) {}
713 
714 PreservedAnalyses HashRecognizePrinterPass::run(Loop &L,
715                                                 LoopAnalysisManager &AM,
716                                                 LoopStandardAnalysisResults &AR,
717                                                 LPMUpdater &) {
718   HashRecognize(L, AR.SE).print(OS);
719   return PreservedAnalyses::all();
720 }
721