10b57cec5SDimitry Andric //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This transformation analyzes and transforms the induction variables (and
100b57cec5SDimitry Andric // computations derived from them) into forms suitable for efficient execution
110b57cec5SDimitry Andric // on the target.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // This pass performs a strength reduction on array references inside loops that
140b57cec5SDimitry Andric // have as one or more of their components the loop induction variable, it
150b57cec5SDimitry Andric // rewrites expressions to take advantage of scaled-index addressing modes
160b57cec5SDimitry Andric // available on the target, and it performs a variety of other optimizations
170b57cec5SDimitry Andric // related to loop induction variables.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // Terminology note: this code has a lot of handling for "post-increment" or
200b57cec5SDimitry Andric // "post-inc" users. This is not talking about post-increment addressing modes;
210b57cec5SDimitry Andric // it is instead talking about code like this:
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // %i = phi [ 0, %entry ], [ %i.next, %latch ]
240b57cec5SDimitry Andric // ...
250b57cec5SDimitry Andric // %i.next = add %i, 1
260b57cec5SDimitry Andric // %c = icmp eq %i.next, %n
270b57cec5SDimitry Andric //
280b57cec5SDimitry Andric // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
290b57cec5SDimitry Andric // it's useful to think about these as the same register, with some uses using
300b57cec5SDimitry Andric // the value of the register before the add and some using it after. In this
310b57cec5SDimitry Andric // example, the icmp is a post-increment user, since it uses %i.next, which is
320b57cec5SDimitry Andric // the value of the induction variable after the increment. The other common
330b57cec5SDimitry Andric // case of post-increment users is users outside the loop.
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric // TODO: More sophistication in the way Formulae are generated and filtered.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // TODO: Handle multiple loops at a time.
380b57cec5SDimitry Andric //
390b57cec5SDimitry Andric // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
400b57cec5SDimitry Andric // of a GlobalValue?
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric // TODO: When truncation is free, truncate ICmp users' operands to make it a
430b57cec5SDimitry Andric // smaller encoding (on x86 at least).
440b57cec5SDimitry Andric //
450b57cec5SDimitry Andric // TODO: When a negated register is used by an add (such as in a list of
460b57cec5SDimitry Andric // multiple base registers, or as the increment expression in an addrec),
470b57cec5SDimitry Andric // we may not actually need both reg and (-1 * reg) in registers; the
480b57cec5SDimitry Andric // negation can be implemented by using a sub instead of an add. The
490b57cec5SDimitry Andric // lack of support for taking this into consideration when making
500b57cec5SDimitry Andric // register pressure decisions is partly worked around by the "Special"
510b57cec5SDimitry Andric // use kind.
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
560b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
570b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
580b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
590b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
600b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h"
610b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
620b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
630b57cec5SDimitry Andric #include "llvm/ADT/SmallBitVector.h"
640b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
650b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
660b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
67bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h"
680b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
695ffd83dbSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
705f757f3fSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
710b57cec5SDimitry Andric #include "llvm/Analysis/IVUsers.h"
720b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
730b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
740b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
755ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSA.h"
765ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
770b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
780b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
790b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionNormalization.h"
80e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
810b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
82fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
8381ad6265SDimitry Andric #include "llvm/BinaryFormat/Dwarf.h"
840b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h"
850b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
860b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
870b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
88e8d8bef9SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
890b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
900b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
910b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
920b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
930b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
940b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
950b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
960b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
970b57cec5SDimitry Andric #include "llvm/IR/Module.h"
980b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
990b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
1000b57cec5SDimitry Andric #include "llvm/IR/Type.h"
1010b57cec5SDimitry Andric #include "llvm/IR/Use.h"
1020b57cec5SDimitry Andric #include "llvm/IR/User.h"
1030b57cec5SDimitry Andric #include "llvm/IR/Value.h"
1040b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h"
105480093f4SDimitry Andric #include "llvm/InitializePasses.h"
1060b57cec5SDimitry Andric #include "llvm/Pass.h"
1070b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
1080b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
1090b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
1100b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
1110b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
1120b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
1130b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
1140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
1150b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
1160b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
117480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
11881ad6265SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
1195ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
1200b57cec5SDimitry Andric #include <algorithm>
1210b57cec5SDimitry Andric #include <cassert>
1220b57cec5SDimitry Andric #include <cstddef>
1230b57cec5SDimitry Andric #include <cstdint>
1240b57cec5SDimitry Andric #include <iterator>
1250b57cec5SDimitry Andric #include <limits>
1260b57cec5SDimitry Andric #include <map>
127480093f4SDimitry Andric #include <numeric>
128bdd1243dSDimitry Andric #include <optional>
1290b57cec5SDimitry Andric #include <utility>
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric using namespace llvm;
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric #define DEBUG_TYPE "loop-reduce"
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
1360b57cec5SDimitry Andric /// bail out. This threshold is far beyond the number of users that LSR can
1370b57cec5SDimitry Andric /// conceivably solve, so it should not affect generated code, but catches the
1380b57cec5SDimitry Andric /// worst cases before LSR burns too much compile time and stack space.
1390b57cec5SDimitry Andric static const unsigned MaxIVUsers = 200;
1400b57cec5SDimitry Andric
141349cc55cSDimitry Andric /// Limit the size of expression that SCEV-based salvaging will attempt to
142349cc55cSDimitry Andric /// translate into a DIExpression.
143349cc55cSDimitry Andric /// Choose a maximum size such that debuginfo is not excessively increased and
144349cc55cSDimitry Andric /// the salvaging is not too expensive for the compiler.
145349cc55cSDimitry Andric static const unsigned MaxSCEVSalvageExpressionSize = 64;
146349cc55cSDimitry Andric
14781ad6265SDimitry Andric // Cleanup congruent phis after LSR phi expansion.
1480b57cec5SDimitry Andric static cl::opt<bool> EnablePhiElim(
1490b57cec5SDimitry Andric "enable-lsr-phielim", cl::Hidden, cl::init(true),
1500b57cec5SDimitry Andric cl::desc("Enable LSR phi elimination"));
1510b57cec5SDimitry Andric
152bdd1243dSDimitry Andric // The flag adds instruction count to solutions cost comparison.
1530b57cec5SDimitry Andric static cl::opt<bool> InsnsCost(
1540b57cec5SDimitry Andric "lsr-insns-cost", cl::Hidden, cl::init(true),
1550b57cec5SDimitry Andric cl::desc("Add instruction count to a LSR cost model"));
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric // Flag to choose how to narrow complex lsr solution
1580b57cec5SDimitry Andric static cl::opt<bool> LSRExpNarrow(
1590b57cec5SDimitry Andric "lsr-exp-narrow", cl::Hidden, cl::init(false),
1600b57cec5SDimitry Andric cl::desc("Narrow LSR complex solution using"
1610b57cec5SDimitry Andric " expectation of registers number"));
1620b57cec5SDimitry Andric
1630b57cec5SDimitry Andric // Flag to narrow search space by filtering non-optimal formulae with
1640b57cec5SDimitry Andric // the same ScaledReg and Scale.
1650b57cec5SDimitry Andric static cl::opt<bool> FilterSameScaledReg(
1660b57cec5SDimitry Andric "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
1670b57cec5SDimitry Andric cl::desc("Narrow LSR search space by filtering non-optimal formulae"
1680b57cec5SDimitry Andric " with the same ScaledReg and Scale"));
1690b57cec5SDimitry Andric
170fe6060f1SDimitry Andric static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
171fe6060f1SDimitry Andric "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
172fe6060f1SDimitry Andric cl::desc("A flag that overrides the target's preferred addressing mode."),
173fe6060f1SDimitry Andric cl::values(clEnumValN(TTI::AMK_None,
174fe6060f1SDimitry Andric "none",
175fe6060f1SDimitry Andric "Don't prefer any addressing mode"),
176fe6060f1SDimitry Andric clEnumValN(TTI::AMK_PreIndexed,
177fe6060f1SDimitry Andric "preindexed",
178fe6060f1SDimitry Andric "Prefer pre-indexed addressing mode"),
179fe6060f1SDimitry Andric clEnumValN(TTI::AMK_PostIndexed,
180fe6060f1SDimitry Andric "postindexed",
181fe6060f1SDimitry Andric "Prefer post-indexed addressing mode")));
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric static cl::opt<unsigned> ComplexityLimit(
1840b57cec5SDimitry Andric "lsr-complexity-limit", cl::Hidden,
1850b57cec5SDimitry Andric cl::init(std::numeric_limits<uint16_t>::max()),
1860b57cec5SDimitry Andric cl::desc("LSR search space complexity limit"));
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric static cl::opt<unsigned> SetupCostDepthLimit(
1890b57cec5SDimitry Andric "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
1900b57cec5SDimitry Andric cl::desc("The limit on recursion depth for LSRs setup cost"));
1910b57cec5SDimitry Andric
1925f757f3fSDimitry Andric static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
1935f757f3fSDimitry Andric "lsr-term-fold", cl::Hidden,
194bdd1243dSDimitry Andric cl::desc("Attempt to replace primary IV with other IV."));
195bdd1243dSDimitry Andric
1960fca6ea1SDimitry Andric static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
1970fca6ea1SDimitry Andric "lsr-drop-solution", cl::Hidden,
198bdd1243dSDimitry Andric cl::desc("Attempt to drop solution if it is less profitable"));
199bdd1243dSDimitry Andric
2000fca6ea1SDimitry Andric static cl::opt<bool> EnableVScaleImmediates(
2010fca6ea1SDimitry Andric "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true),
2020fca6ea1SDimitry Andric cl::desc("Enable analysis of vscale-relative immediates in LSR"));
2030fca6ea1SDimitry Andric
2040fca6ea1SDimitry Andric static cl::opt<bool> DropScaledForVScale(
2050fca6ea1SDimitry Andric "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
2060fca6ea1SDimitry Andric cl::desc("Avoid using scaled registers with vscale-relative addressing"));
2070fca6ea1SDimitry Andric
208bdd1243dSDimitry Andric STATISTIC(NumTermFold,
209bdd1243dSDimitry Andric "Number of terminating condition fold recognized and performed");
210bdd1243dSDimitry Andric
2110b57cec5SDimitry Andric #ifndef NDEBUG
2120b57cec5SDimitry Andric // Stress test IV chain generation.
2130b57cec5SDimitry Andric static cl::opt<bool> StressIVChain(
2140b57cec5SDimitry Andric "stress-ivchain", cl::Hidden, cl::init(false),
2150b57cec5SDimitry Andric cl::desc("Stress test LSR IV chains"));
2160b57cec5SDimitry Andric #else
2170b57cec5SDimitry Andric static bool StressIVChain = false;
2180b57cec5SDimitry Andric #endif
2190b57cec5SDimitry Andric
2200b57cec5SDimitry Andric namespace {
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric struct MemAccessTy {
2230b57cec5SDimitry Andric /// Used in situations where the accessed memory type is unknown.
2240b57cec5SDimitry Andric static const unsigned UnknownAddressSpace =
2250b57cec5SDimitry Andric std::numeric_limits<unsigned>::max();
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andric Type *MemTy = nullptr;
2280b57cec5SDimitry Andric unsigned AddrSpace = UnknownAddressSpace;
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric MemAccessTy() = default;
MemAccessTy__anonc21373340111::MemAccessTy2310b57cec5SDimitry Andric MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
2320b57cec5SDimitry Andric
operator ==__anonc21373340111::MemAccessTy2330b57cec5SDimitry Andric bool operator==(MemAccessTy Other) const {
2340b57cec5SDimitry Andric return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric
operator !=__anonc21373340111::MemAccessTy2370b57cec5SDimitry Andric bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
2380b57cec5SDimitry Andric
getUnknown__anonc21373340111::MemAccessTy2390b57cec5SDimitry Andric static MemAccessTy getUnknown(LLVMContext &Ctx,
2400b57cec5SDimitry Andric unsigned AS = UnknownAddressSpace) {
2410b57cec5SDimitry Andric return MemAccessTy(Type::getVoidTy(Ctx), AS);
2420b57cec5SDimitry Andric }
2430b57cec5SDimitry Andric
getType__anonc21373340111::MemAccessTy2440b57cec5SDimitry Andric Type *getType() { return MemTy; }
2450b57cec5SDimitry Andric };
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric /// This class holds data which is used to order reuse candidates.
2480b57cec5SDimitry Andric class RegSortData {
2490b57cec5SDimitry Andric public:
2500b57cec5SDimitry Andric /// This represents the set of LSRUse indices which reference
2510b57cec5SDimitry Andric /// a particular register.
2520b57cec5SDimitry Andric SmallBitVector UsedByIndices;
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric void print(raw_ostream &OS) const;
2550b57cec5SDimitry Andric void dump() const;
2560b57cec5SDimitry Andric };
2570b57cec5SDimitry Andric
2580fca6ea1SDimitry Andric // An offset from an address that is either scalable or fixed. Used for
2590fca6ea1SDimitry Andric // per-target optimizations of addressing modes.
2600fca6ea1SDimitry Andric class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {
Immediate(ScalarTy MinVal,bool Scalable)2610fca6ea1SDimitry Andric constexpr Immediate(ScalarTy MinVal, bool Scalable)
2620fca6ea1SDimitry Andric : FixedOrScalableQuantity(MinVal, Scalable) {}
2630fca6ea1SDimitry Andric
Immediate(const FixedOrScalableQuantity<Immediate,int64_t> & V)2640fca6ea1SDimitry Andric constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
2650fca6ea1SDimitry Andric : FixedOrScalableQuantity(V) {}
2660fca6ea1SDimitry Andric
2670fca6ea1SDimitry Andric public:
2680fca6ea1SDimitry Andric constexpr Immediate() = delete;
2690fca6ea1SDimitry Andric
getFixed(ScalarTy MinVal)2700fca6ea1SDimitry Andric static constexpr Immediate getFixed(ScalarTy MinVal) {
2710fca6ea1SDimitry Andric return {MinVal, false};
2720fca6ea1SDimitry Andric }
getScalable(ScalarTy MinVal)2730fca6ea1SDimitry Andric static constexpr Immediate getScalable(ScalarTy MinVal) {
2740fca6ea1SDimitry Andric return {MinVal, true};
2750fca6ea1SDimitry Andric }
get(ScalarTy MinVal,bool Scalable)2760fca6ea1SDimitry Andric static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
2770fca6ea1SDimitry Andric return {MinVal, Scalable};
2780fca6ea1SDimitry Andric }
getZero()2790fca6ea1SDimitry Andric static constexpr Immediate getZero() { return {0, false}; }
getFixedMin()2800fca6ea1SDimitry Andric static constexpr Immediate getFixedMin() {
2810fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::min(), false};
2820fca6ea1SDimitry Andric }
getFixedMax()2830fca6ea1SDimitry Andric static constexpr Immediate getFixedMax() {
2840fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::max(), false};
2850fca6ea1SDimitry Andric }
getScalableMin()2860fca6ea1SDimitry Andric static constexpr Immediate getScalableMin() {
2870fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::min(), true};
2880fca6ea1SDimitry Andric }
getScalableMax()2890fca6ea1SDimitry Andric static constexpr Immediate getScalableMax() {
2900fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::max(), true};
2910fca6ea1SDimitry Andric }
2920fca6ea1SDimitry Andric
isLessThanZero() const2930fca6ea1SDimitry Andric constexpr bool isLessThanZero() const { return Quantity < 0; }
2940fca6ea1SDimitry Andric
isGreaterThanZero() const2950fca6ea1SDimitry Andric constexpr bool isGreaterThanZero() const { return Quantity > 0; }
2960fca6ea1SDimitry Andric
isCompatibleImmediate(const Immediate & Imm) const2970fca6ea1SDimitry Andric constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
2980fca6ea1SDimitry Andric return isZero() || Imm.isZero() || Imm.Scalable == Scalable;
2990fca6ea1SDimitry Andric }
3000fca6ea1SDimitry Andric
isMin() const3010fca6ea1SDimitry Andric constexpr bool isMin() const {
3020fca6ea1SDimitry Andric return Quantity == std::numeric_limits<ScalarTy>::min();
3030fca6ea1SDimitry Andric }
3040fca6ea1SDimitry Andric
isMax() const3050fca6ea1SDimitry Andric constexpr bool isMax() const {
3060fca6ea1SDimitry Andric return Quantity == std::numeric_limits<ScalarTy>::max();
3070fca6ea1SDimitry Andric }
3080fca6ea1SDimitry Andric
3090fca6ea1SDimitry Andric // Arithmetic 'operators' that cast to unsigned types first.
addUnsigned(const Immediate & RHS) const3100fca6ea1SDimitry Andric constexpr Immediate addUnsigned(const Immediate &RHS) const {
3110fca6ea1SDimitry Andric assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
3120fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
3130fca6ea1SDimitry Andric return {Value, Scalable || RHS.isScalable()};
3140fca6ea1SDimitry Andric }
3150fca6ea1SDimitry Andric
subUnsigned(const Immediate & RHS) const3160fca6ea1SDimitry Andric constexpr Immediate subUnsigned(const Immediate &RHS) const {
3170fca6ea1SDimitry Andric assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
3180fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
3190fca6ea1SDimitry Andric return {Value, Scalable || RHS.isScalable()};
3200fca6ea1SDimitry Andric }
3210fca6ea1SDimitry Andric
3220fca6ea1SDimitry Andric // Scale the quantity by a constant without caring about runtime scalability.
mulUnsigned(const ScalarTy RHS) const3230fca6ea1SDimitry Andric constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
3240fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity * RHS;
3250fca6ea1SDimitry Andric return {Value, Scalable};
3260fca6ea1SDimitry Andric }
3270fca6ea1SDimitry Andric
3280fca6ea1SDimitry Andric // Helpers for generating SCEVs with vscale terms where needed.
getSCEV(ScalarEvolution & SE,Type * Ty) const3290fca6ea1SDimitry Andric const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {
3300fca6ea1SDimitry Andric const SCEV *S = SE.getConstant(Ty, Quantity);
3310fca6ea1SDimitry Andric if (Scalable)
3320fca6ea1SDimitry Andric S = SE.getMulExpr(S, SE.getVScale(S->getType()));
3330fca6ea1SDimitry Andric return S;
3340fca6ea1SDimitry Andric }
3350fca6ea1SDimitry Andric
getNegativeSCEV(ScalarEvolution & SE,Type * Ty) const3360fca6ea1SDimitry Andric const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {
3370fca6ea1SDimitry Andric const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);
3380fca6ea1SDimitry Andric if (Scalable)
3390fca6ea1SDimitry Andric NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));
3400fca6ea1SDimitry Andric return NegS;
3410fca6ea1SDimitry Andric }
3420fca6ea1SDimitry Andric
getUnknownSCEV(ScalarEvolution & SE,Type * Ty) const3430fca6ea1SDimitry Andric const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {
3440fca6ea1SDimitry Andric const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));
3450fca6ea1SDimitry Andric if (Scalable)
3460fca6ea1SDimitry Andric SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));
3470fca6ea1SDimitry Andric return SU;
3480fca6ea1SDimitry Andric }
3490fca6ea1SDimitry Andric };
3500fca6ea1SDimitry Andric
3510fca6ea1SDimitry Andric // This is needed for the Compare type of std::map when Immediate is used
3520fca6ea1SDimitry Andric // as a key. We don't need it to be fully correct against any value of vscale,
3530fca6ea1SDimitry Andric // just to make sure that vscale-related terms in the map are considered against
3540fca6ea1SDimitry Andric // each other rather than being mixed up and potentially missing opportunities.
3550fca6ea1SDimitry Andric struct KeyOrderTargetImmediate {
operator ()__anonc21373340111::KeyOrderTargetImmediate3560fca6ea1SDimitry Andric bool operator()(const Immediate &LHS, const Immediate &RHS) const {
3570fca6ea1SDimitry Andric if (LHS.isScalable() && !RHS.isScalable())
3580fca6ea1SDimitry Andric return false;
3590fca6ea1SDimitry Andric if (!LHS.isScalable() && RHS.isScalable())
3600fca6ea1SDimitry Andric return true;
3610fca6ea1SDimitry Andric return LHS.getKnownMinValue() < RHS.getKnownMinValue();
3620fca6ea1SDimitry Andric }
3630fca6ea1SDimitry Andric };
3640fca6ea1SDimitry Andric
3650fca6ea1SDimitry Andric // This would be nicer if we could be generic instead of directly using size_t,
3660fca6ea1SDimitry Andric // but there doesn't seem to be a type trait for is_orderable or
3670fca6ea1SDimitry Andric // is_lessthan_comparable or similar.
3680fca6ea1SDimitry Andric struct KeyOrderSizeTAndImmediate {
operator ()__anonc21373340111::KeyOrderSizeTAndImmediate3690fca6ea1SDimitry Andric bool operator()(const std::pair<size_t, Immediate> &LHS,
3700fca6ea1SDimitry Andric const std::pair<size_t, Immediate> &RHS) const {
3710fca6ea1SDimitry Andric size_t LSize = LHS.first;
3720fca6ea1SDimitry Andric size_t RSize = RHS.first;
3730fca6ea1SDimitry Andric if (LSize != RSize)
3740fca6ea1SDimitry Andric return LSize < RSize;
3750fca6ea1SDimitry Andric return KeyOrderTargetImmediate()(LHS.second, RHS.second);
3760fca6ea1SDimitry Andric }
3770fca6ea1SDimitry Andric };
3780b57cec5SDimitry Andric } // end anonymous namespace
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const3810b57cec5SDimitry Andric void RegSortData::print(raw_ostream &OS) const {
3820b57cec5SDimitry Andric OS << "[NumUses=" << UsedByIndices.count() << ']';
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric
dump() const3850b57cec5SDimitry Andric LLVM_DUMP_METHOD void RegSortData::dump() const {
3860b57cec5SDimitry Andric print(errs()); errs() << '\n';
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric #endif
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric namespace {
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric /// Map register candidates to information about how they are used.
3930b57cec5SDimitry Andric class RegUseTracker {
3940b57cec5SDimitry Andric using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric RegUsesTy RegUsesMap;
3970b57cec5SDimitry Andric SmallVector<const SCEV *, 16> RegSequence;
3980b57cec5SDimitry Andric
3990b57cec5SDimitry Andric public:
4000b57cec5SDimitry Andric void countRegister(const SCEV *Reg, size_t LUIdx);
4010b57cec5SDimitry Andric void dropRegister(const SCEV *Reg, size_t LUIdx);
4020b57cec5SDimitry Andric void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
4030b57cec5SDimitry Andric
4040b57cec5SDimitry Andric bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
4050b57cec5SDimitry Andric
4060b57cec5SDimitry Andric const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
4070b57cec5SDimitry Andric
4080b57cec5SDimitry Andric void clear();
4090b57cec5SDimitry Andric
4100b57cec5SDimitry Andric using iterator = SmallVectorImpl<const SCEV *>::iterator;
4110b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
4120b57cec5SDimitry Andric
begin()4130b57cec5SDimitry Andric iterator begin() { return RegSequence.begin(); }
end()4140b57cec5SDimitry Andric iterator end() { return RegSequence.end(); }
begin() const4150b57cec5SDimitry Andric const_iterator begin() const { return RegSequence.begin(); }
end() const4160b57cec5SDimitry Andric const_iterator end() const { return RegSequence.end(); }
4170b57cec5SDimitry Andric };
4180b57cec5SDimitry Andric
4190b57cec5SDimitry Andric } // end anonymous namespace
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric void
countRegister(const SCEV * Reg,size_t LUIdx)4220b57cec5SDimitry Andric RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
4230b57cec5SDimitry Andric std::pair<RegUsesTy::iterator, bool> Pair =
4240b57cec5SDimitry Andric RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
4250b57cec5SDimitry Andric RegSortData &RSD = Pair.first->second;
4260b57cec5SDimitry Andric if (Pair.second)
4270b57cec5SDimitry Andric RegSequence.push_back(Reg);
4280b57cec5SDimitry Andric RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
4290b57cec5SDimitry Andric RSD.UsedByIndices.set(LUIdx);
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric
4320b57cec5SDimitry Andric void
dropRegister(const SCEV * Reg,size_t LUIdx)4330b57cec5SDimitry Andric RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
4340b57cec5SDimitry Andric RegUsesTy::iterator It = RegUsesMap.find(Reg);
4350b57cec5SDimitry Andric assert(It != RegUsesMap.end());
4360b57cec5SDimitry Andric RegSortData &RSD = It->second;
4370b57cec5SDimitry Andric assert(RSD.UsedByIndices.size() > LUIdx);
4380b57cec5SDimitry Andric RSD.UsedByIndices.reset(LUIdx);
4390b57cec5SDimitry Andric }
4400b57cec5SDimitry Andric
4410b57cec5SDimitry Andric void
swapAndDropUse(size_t LUIdx,size_t LastLUIdx)4420b57cec5SDimitry Andric RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
4430b57cec5SDimitry Andric assert(LUIdx <= LastLUIdx);
4440b57cec5SDimitry Andric
4450b57cec5SDimitry Andric // Update RegUses. The data structure is not optimized for this purpose;
4460b57cec5SDimitry Andric // we must iterate through it and update each of the bit vectors.
4470b57cec5SDimitry Andric for (auto &Pair : RegUsesMap) {
4480b57cec5SDimitry Andric SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
4490b57cec5SDimitry Andric if (LUIdx < UsedByIndices.size())
4500b57cec5SDimitry Andric UsedByIndices[LUIdx] =
4510b57cec5SDimitry Andric LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
4520b57cec5SDimitry Andric UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric
4560b57cec5SDimitry Andric bool
isRegUsedByUsesOtherThan(const SCEV * Reg,size_t LUIdx) const4570b57cec5SDimitry Andric RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
4580b57cec5SDimitry Andric RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
4590b57cec5SDimitry Andric if (I == RegUsesMap.end())
4600b57cec5SDimitry Andric return false;
4610b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
4620b57cec5SDimitry Andric int i = UsedByIndices.find_first();
4630b57cec5SDimitry Andric if (i == -1) return false;
4640b57cec5SDimitry Andric if ((size_t)i != LUIdx) return true;
4650b57cec5SDimitry Andric return UsedByIndices.find_next(i) != -1;
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric
getUsedByIndices(const SCEV * Reg) const4680b57cec5SDimitry Andric const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
4690b57cec5SDimitry Andric RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
4700b57cec5SDimitry Andric assert(I != RegUsesMap.end() && "Unknown register!");
4710b57cec5SDimitry Andric return I->second.UsedByIndices;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric
clear()4740b57cec5SDimitry Andric void RegUseTracker::clear() {
4750b57cec5SDimitry Andric RegUsesMap.clear();
4760b57cec5SDimitry Andric RegSequence.clear();
4770b57cec5SDimitry Andric }
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andric namespace {
4800b57cec5SDimitry Andric
4810b57cec5SDimitry Andric /// This class holds information that describes a formula for computing
4820b57cec5SDimitry Andric /// satisfying a use. It may include broken-out immediates and scaled registers.
4830b57cec5SDimitry Andric struct Formula {
4840b57cec5SDimitry Andric /// Global base address used for complex addressing.
4850b57cec5SDimitry Andric GlobalValue *BaseGV = nullptr;
4860b57cec5SDimitry Andric
4870b57cec5SDimitry Andric /// Base offset for complex addressing.
4880fca6ea1SDimitry Andric Immediate BaseOffset = Immediate::getZero();
4890b57cec5SDimitry Andric
4900b57cec5SDimitry Andric /// Whether any complex addressing has a base register.
4910b57cec5SDimitry Andric bool HasBaseReg = false;
4920b57cec5SDimitry Andric
4930b57cec5SDimitry Andric /// The scale of any complex addressing.
4940b57cec5SDimitry Andric int64_t Scale = 0;
4950b57cec5SDimitry Andric
4960b57cec5SDimitry Andric /// The list of "base" registers for this use. When this is non-empty. The
4970b57cec5SDimitry Andric /// canonical representation of a formula is
4980b57cec5SDimitry Andric /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
4990b57cec5SDimitry Andric /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
5000b57cec5SDimitry Andric /// 3. The reg containing recurrent expr related with currect loop in the
5010b57cec5SDimitry Andric /// formula should be put in the ScaledReg.
5020b57cec5SDimitry Andric /// #1 enforces that the scaled register is always used when at least two
5030b57cec5SDimitry Andric /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
5040b57cec5SDimitry Andric /// #2 enforces that 1 * reg is reg.
5050b57cec5SDimitry Andric /// #3 ensures invariant regs with respect to current loop can be combined
5060b57cec5SDimitry Andric /// together in LSR codegen.
5070b57cec5SDimitry Andric /// This invariant can be temporarily broken while building a formula.
5080b57cec5SDimitry Andric /// However, every formula inserted into the LSRInstance must be in canonical
5090b57cec5SDimitry Andric /// form.
5100b57cec5SDimitry Andric SmallVector<const SCEV *, 4> BaseRegs;
5110b57cec5SDimitry Andric
5120b57cec5SDimitry Andric /// The 'scaled' register for this use. This should be non-null when Scale is
5130b57cec5SDimitry Andric /// not zero.
5140b57cec5SDimitry Andric const SCEV *ScaledReg = nullptr;
5150b57cec5SDimitry Andric
5160b57cec5SDimitry Andric /// An additional constant offset which added near the use. This requires a
5170b57cec5SDimitry Andric /// temporary register, but the offset itself can live in an add immediate
5180b57cec5SDimitry Andric /// field rather than a register.
5190fca6ea1SDimitry Andric Immediate UnfoldedOffset = Immediate::getZero();
5200b57cec5SDimitry Andric
5210b57cec5SDimitry Andric Formula() = default;
5220b57cec5SDimitry Andric
5230b57cec5SDimitry Andric void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric bool isCanonical(const Loop &L) const;
5260b57cec5SDimitry Andric
5270b57cec5SDimitry Andric void canonicalize(const Loop &L);
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric bool unscale();
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric bool hasZeroEnd() const;
5320b57cec5SDimitry Andric
5330b57cec5SDimitry Andric size_t getNumRegs() const;
5340b57cec5SDimitry Andric Type *getType() const;
5350b57cec5SDimitry Andric
5360b57cec5SDimitry Andric void deleteBaseReg(const SCEV *&S);
5370b57cec5SDimitry Andric
5380b57cec5SDimitry Andric bool referencesReg(const SCEV *S) const;
5390b57cec5SDimitry Andric bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
5400b57cec5SDimitry Andric const RegUseTracker &RegUses) const;
5410b57cec5SDimitry Andric
5420b57cec5SDimitry Andric void print(raw_ostream &OS) const;
5430b57cec5SDimitry Andric void dump() const;
5440b57cec5SDimitry Andric };
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric } // end anonymous namespace
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric /// Recursion helper for initialMatch.
DoInitialMatch(const SCEV * S,Loop * L,SmallVectorImpl<const SCEV * > & Good,SmallVectorImpl<const SCEV * > & Bad,ScalarEvolution & SE)5490b57cec5SDimitry Andric static void DoInitialMatch(const SCEV *S, Loop *L,
5500b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Good,
5510b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Bad,
5520b57cec5SDimitry Andric ScalarEvolution &SE) {
5530b57cec5SDimitry Andric // Collect expressions which properly dominate the loop header.
5540b57cec5SDimitry Andric if (SE.properlyDominates(S, L->getHeader())) {
5550b57cec5SDimitry Andric Good.push_back(S);
5560b57cec5SDimitry Andric return;
5570b57cec5SDimitry Andric }
5580b57cec5SDimitry Andric
5590b57cec5SDimitry Andric // Look at add operands.
5600b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
5610b57cec5SDimitry Andric for (const SCEV *S : Add->operands())
5620b57cec5SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE);
5630b57cec5SDimitry Andric return;
5640b57cec5SDimitry Andric }
5650b57cec5SDimitry Andric
5660b57cec5SDimitry Andric // Look at addrec operands.
5670b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
5680b57cec5SDimitry Andric if (!AR->getStart()->isZero() && AR->isAffine()) {
5690b57cec5SDimitry Andric DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
5700b57cec5SDimitry Andric DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
5710b57cec5SDimitry Andric AR->getStepRecurrence(SE),
5720b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags()
5730b57cec5SDimitry Andric AR->getLoop(), SCEV::FlagAnyWrap),
5740b57cec5SDimitry Andric L, Good, Bad, SE);
5750b57cec5SDimitry Andric return;
5760b57cec5SDimitry Andric }
5770b57cec5SDimitry Andric
5780b57cec5SDimitry Andric // Handle a multiplication by -1 (negation) if it didn't fold.
5790b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
5800b57cec5SDimitry Andric if (Mul->getOperand(0)->isAllOnesValue()) {
581e8d8bef9SDimitry Andric SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
5820b57cec5SDimitry Andric const SCEV *NewMul = SE.getMulExpr(Ops);
5830b57cec5SDimitry Andric
5840b57cec5SDimitry Andric SmallVector<const SCEV *, 4> MyGood;
5850b57cec5SDimitry Andric SmallVector<const SCEV *, 4> MyBad;
5860b57cec5SDimitry Andric DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
5870b57cec5SDimitry Andric const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
5880b57cec5SDimitry Andric SE.getEffectiveSCEVType(NewMul->getType())));
5890b57cec5SDimitry Andric for (const SCEV *S : MyGood)
5900b57cec5SDimitry Andric Good.push_back(SE.getMulExpr(NegOne, S));
5910b57cec5SDimitry Andric for (const SCEV *S : MyBad)
5920b57cec5SDimitry Andric Bad.push_back(SE.getMulExpr(NegOne, S));
5930b57cec5SDimitry Andric return;
5940b57cec5SDimitry Andric }
5950b57cec5SDimitry Andric
5960b57cec5SDimitry Andric // Ok, we can't do anything interesting. Just stuff the whole thing into a
5970b57cec5SDimitry Andric // register and hope for the best.
5980b57cec5SDimitry Andric Bad.push_back(S);
5990b57cec5SDimitry Andric }
6000b57cec5SDimitry Andric
6010b57cec5SDimitry Andric /// Incorporate loop-variant parts of S into this Formula, attempting to keep
6020b57cec5SDimitry Andric /// all loop-invariant and loop-computable values in a single base register.
initialMatch(const SCEV * S,Loop * L,ScalarEvolution & SE)6030b57cec5SDimitry Andric void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
6040b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Good;
6050b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Bad;
6060b57cec5SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE);
6070b57cec5SDimitry Andric if (!Good.empty()) {
6080b57cec5SDimitry Andric const SCEV *Sum = SE.getAddExpr(Good);
6090b57cec5SDimitry Andric if (!Sum->isZero())
6100b57cec5SDimitry Andric BaseRegs.push_back(Sum);
6110b57cec5SDimitry Andric HasBaseReg = true;
6120b57cec5SDimitry Andric }
6130b57cec5SDimitry Andric if (!Bad.empty()) {
6140b57cec5SDimitry Andric const SCEV *Sum = SE.getAddExpr(Bad);
6150b57cec5SDimitry Andric if (!Sum->isZero())
6160b57cec5SDimitry Andric BaseRegs.push_back(Sum);
6170b57cec5SDimitry Andric HasBaseReg = true;
6180b57cec5SDimitry Andric }
6190b57cec5SDimitry Andric canonicalize(*L);
6200b57cec5SDimitry Andric }
6210b57cec5SDimitry Andric
containsAddRecDependentOnLoop(const SCEV * S,const Loop & L)62281ad6265SDimitry Andric static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
62381ad6265SDimitry Andric return SCEVExprContains(S, [&L](const SCEV *S) {
62481ad6265SDimitry Andric return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
62581ad6265SDimitry Andric });
62681ad6265SDimitry Andric }
62781ad6265SDimitry Andric
6280b57cec5SDimitry Andric /// Check whether or not this formula satisfies the canonical
6290b57cec5SDimitry Andric /// representation.
6300b57cec5SDimitry Andric /// \see Formula::BaseRegs.
isCanonical(const Loop & L) const6310b57cec5SDimitry Andric bool Formula::isCanonical(const Loop &L) const {
6320b57cec5SDimitry Andric if (!ScaledReg)
6330b57cec5SDimitry Andric return BaseRegs.size() <= 1;
6340b57cec5SDimitry Andric
6350b57cec5SDimitry Andric if (Scale != 1)
6360b57cec5SDimitry Andric return true;
6370b57cec5SDimitry Andric
6380b57cec5SDimitry Andric if (Scale == 1 && BaseRegs.empty())
6390b57cec5SDimitry Andric return false;
6400b57cec5SDimitry Andric
64181ad6265SDimitry Andric if (containsAddRecDependentOnLoop(ScaledReg, L))
6420b57cec5SDimitry Andric return true;
6430b57cec5SDimitry Andric
6440b57cec5SDimitry Andric // If ScaledReg is not a recurrent expr, or it is but its loop is not current
6450b57cec5SDimitry Andric // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
6460b57cec5SDimitry Andric // loop, we want to swap the reg in BaseRegs with ScaledReg.
64781ad6265SDimitry Andric return none_of(BaseRegs, [&L](const SCEV *S) {
64881ad6265SDimitry Andric return containsAddRecDependentOnLoop(S, L);
6490b57cec5SDimitry Andric });
6500b57cec5SDimitry Andric }
6510b57cec5SDimitry Andric
6520b57cec5SDimitry Andric /// Helper method to morph a formula into its canonical representation.
6530b57cec5SDimitry Andric /// \see Formula::BaseRegs.
6540b57cec5SDimitry Andric /// Every formula having more than one base register, must use the ScaledReg
6550b57cec5SDimitry Andric /// field. Otherwise, we would have to do special cases everywhere in LSR
6560b57cec5SDimitry Andric /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
6570b57cec5SDimitry Andric /// On the other hand, 1*reg should be canonicalized into reg.
canonicalize(const Loop & L)6580b57cec5SDimitry Andric void Formula::canonicalize(const Loop &L) {
6590b57cec5SDimitry Andric if (isCanonical(L))
6600b57cec5SDimitry Andric return;
661fe6060f1SDimitry Andric
662fe6060f1SDimitry Andric if (BaseRegs.empty()) {
663fe6060f1SDimitry Andric // No base reg? Use scale reg with scale = 1 as such.
664fe6060f1SDimitry Andric assert(ScaledReg && "Expected 1*reg => reg");
665fe6060f1SDimitry Andric assert(Scale == 1 && "Expected 1*reg => reg");
666fe6060f1SDimitry Andric BaseRegs.push_back(ScaledReg);
667fe6060f1SDimitry Andric Scale = 0;
668fe6060f1SDimitry Andric ScaledReg = nullptr;
669fe6060f1SDimitry Andric return;
670fe6060f1SDimitry Andric }
6710b57cec5SDimitry Andric
6720b57cec5SDimitry Andric // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
6730b57cec5SDimitry Andric if (!ScaledReg) {
674e8d8bef9SDimitry Andric ScaledReg = BaseRegs.pop_back_val();
6750b57cec5SDimitry Andric Scale = 1;
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric
6780b57cec5SDimitry Andric // If ScaledReg is an invariant with respect to L, find the reg from
6790b57cec5SDimitry Andric // BaseRegs containing the recurrent expr related with Loop L. Swap the
6800b57cec5SDimitry Andric // reg with ScaledReg.
68181ad6265SDimitry Andric if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
68281ad6265SDimitry Andric auto I = find_if(BaseRegs, [&L](const SCEV *S) {
68381ad6265SDimitry Andric return containsAddRecDependentOnLoop(S, L);
6840b57cec5SDimitry Andric });
6850b57cec5SDimitry Andric if (I != BaseRegs.end())
6860b57cec5SDimitry Andric std::swap(ScaledReg, *I);
6870b57cec5SDimitry Andric }
688fe6060f1SDimitry Andric assert(isCanonical(L) && "Failed to canonicalize?");
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric
6910b57cec5SDimitry Andric /// Get rid of the scale in the formula.
6920b57cec5SDimitry Andric /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
6930b57cec5SDimitry Andric /// \return true if it was possible to get rid of the scale, false otherwise.
6940b57cec5SDimitry Andric /// \note After this operation the formula may not be in the canonical form.
unscale()6950b57cec5SDimitry Andric bool Formula::unscale() {
6960b57cec5SDimitry Andric if (Scale != 1)
6970b57cec5SDimitry Andric return false;
6980b57cec5SDimitry Andric Scale = 0;
6990b57cec5SDimitry Andric BaseRegs.push_back(ScaledReg);
7000b57cec5SDimitry Andric ScaledReg = nullptr;
7010b57cec5SDimitry Andric return true;
7020b57cec5SDimitry Andric }
7030b57cec5SDimitry Andric
hasZeroEnd() const7040b57cec5SDimitry Andric bool Formula::hasZeroEnd() const {
7050b57cec5SDimitry Andric if (UnfoldedOffset || BaseOffset)
7060b57cec5SDimitry Andric return false;
7070b57cec5SDimitry Andric if (BaseRegs.size() != 1 || ScaledReg)
7080b57cec5SDimitry Andric return false;
7090b57cec5SDimitry Andric return true;
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric
7120b57cec5SDimitry Andric /// Return the total number of register operands used by this formula. This does
7130b57cec5SDimitry Andric /// not include register uses implied by non-constant addrec strides.
getNumRegs() const7140b57cec5SDimitry Andric size_t Formula::getNumRegs() const {
7150b57cec5SDimitry Andric return !!ScaledReg + BaseRegs.size();
7160b57cec5SDimitry Andric }
7170b57cec5SDimitry Andric
7180b57cec5SDimitry Andric /// Return the type of this formula, if it has one, or null otherwise. This type
7190b57cec5SDimitry Andric /// is meaningless except for the bit size.
getType() const7200b57cec5SDimitry Andric Type *Formula::getType() const {
7210b57cec5SDimitry Andric return !BaseRegs.empty() ? BaseRegs.front()->getType() :
7220b57cec5SDimitry Andric ScaledReg ? ScaledReg->getType() :
7230b57cec5SDimitry Andric BaseGV ? BaseGV->getType() :
7240b57cec5SDimitry Andric nullptr;
7250b57cec5SDimitry Andric }
7260b57cec5SDimitry Andric
7270b57cec5SDimitry Andric /// Delete the given base reg from the BaseRegs list.
deleteBaseReg(const SCEV * & S)7280b57cec5SDimitry Andric void Formula::deleteBaseReg(const SCEV *&S) {
7290b57cec5SDimitry Andric if (&S != &BaseRegs.back())
7300b57cec5SDimitry Andric std::swap(S, BaseRegs.back());
7310b57cec5SDimitry Andric BaseRegs.pop_back();
7320b57cec5SDimitry Andric }
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric /// Test if this formula references the given register.
referencesReg(const SCEV * S) const7350b57cec5SDimitry Andric bool Formula::referencesReg(const SCEV *S) const {
7360b57cec5SDimitry Andric return S == ScaledReg || is_contained(BaseRegs, S);
7370b57cec5SDimitry Andric }
7380b57cec5SDimitry Andric
7390b57cec5SDimitry Andric /// Test whether this formula uses registers which are used by uses other than
7400b57cec5SDimitry Andric /// the use with the given index.
hasRegsUsedByUsesOtherThan(size_t LUIdx,const RegUseTracker & RegUses) const7410b57cec5SDimitry Andric bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
7420b57cec5SDimitry Andric const RegUseTracker &RegUses) const {
7430b57cec5SDimitry Andric if (ScaledReg)
7440b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
7450b57cec5SDimitry Andric return true;
7460b57cec5SDimitry Andric for (const SCEV *BaseReg : BaseRegs)
7470b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
7480b57cec5SDimitry Andric return true;
7490b57cec5SDimitry Andric return false;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric
7520b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const7530b57cec5SDimitry Andric void Formula::print(raw_ostream &OS) const {
7540b57cec5SDimitry Andric bool First = true;
7550b57cec5SDimitry Andric if (BaseGV) {
7560b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7570b57cec5SDimitry Andric BaseGV->printAsOperand(OS, /*PrintType=*/false);
7580b57cec5SDimitry Andric }
7590fca6ea1SDimitry Andric if (BaseOffset.isNonZero()) {
7600b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7610b57cec5SDimitry Andric OS << BaseOffset;
7620b57cec5SDimitry Andric }
7630b57cec5SDimitry Andric for (const SCEV *BaseReg : BaseRegs) {
7640b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7650b57cec5SDimitry Andric OS << "reg(" << *BaseReg << ')';
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric if (HasBaseReg && BaseRegs.empty()) {
7680b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7690b57cec5SDimitry Andric OS << "**error: HasBaseReg**";
7700b57cec5SDimitry Andric } else if (!HasBaseReg && !BaseRegs.empty()) {
7710b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7720b57cec5SDimitry Andric OS << "**error: !HasBaseReg**";
7730b57cec5SDimitry Andric }
7740b57cec5SDimitry Andric if (Scale != 0) {
7750b57cec5SDimitry Andric if (!First) OS << " + "; else First = false;
7760b57cec5SDimitry Andric OS << Scale << "*reg(";
7770b57cec5SDimitry Andric if (ScaledReg)
7780b57cec5SDimitry Andric OS << *ScaledReg;
7790b57cec5SDimitry Andric else
7800b57cec5SDimitry Andric OS << "<unknown>";
7810b57cec5SDimitry Andric OS << ')';
7820b57cec5SDimitry Andric }
7830fca6ea1SDimitry Andric if (UnfoldedOffset.isNonZero()) {
7840b57cec5SDimitry Andric if (!First) OS << " + ";
7850b57cec5SDimitry Andric OS << "imm(" << UnfoldedOffset << ')';
7860b57cec5SDimitry Andric }
7870b57cec5SDimitry Andric }
7880b57cec5SDimitry Andric
dump() const7890b57cec5SDimitry Andric LLVM_DUMP_METHOD void Formula::dump() const {
7900b57cec5SDimitry Andric print(errs()); errs() << '\n';
7910b57cec5SDimitry Andric }
7920b57cec5SDimitry Andric #endif
7930b57cec5SDimitry Andric
7940b57cec5SDimitry Andric /// Return true if the given addrec can be sign-extended without changing its
7950b57cec5SDimitry Andric /// value.
isAddRecSExtable(const SCEVAddRecExpr * AR,ScalarEvolution & SE)7960b57cec5SDimitry Andric static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
7970b57cec5SDimitry Andric Type *WideTy =
7980b57cec5SDimitry Andric IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
7990b57cec5SDimitry Andric return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
8000b57cec5SDimitry Andric }
8010b57cec5SDimitry Andric
8020b57cec5SDimitry Andric /// Return true if the given add can be sign-extended without changing its
8030b57cec5SDimitry Andric /// value.
isAddSExtable(const SCEVAddExpr * A,ScalarEvolution & SE)8040b57cec5SDimitry Andric static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
8050b57cec5SDimitry Andric Type *WideTy =
8060b57cec5SDimitry Andric IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
8070b57cec5SDimitry Andric return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric
8100b57cec5SDimitry Andric /// Return true if the given mul can be sign-extended without changing its
8110b57cec5SDimitry Andric /// value.
isMulSExtable(const SCEVMulExpr * M,ScalarEvolution & SE)8120b57cec5SDimitry Andric static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
8130b57cec5SDimitry Andric Type *WideTy =
8140b57cec5SDimitry Andric IntegerType::get(SE.getContext(),
8150b57cec5SDimitry Andric SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
8160b57cec5SDimitry Andric return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric
8190b57cec5SDimitry Andric /// Return an expression for LHS /s RHS, if it can be determined and if the
8200b57cec5SDimitry Andric /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
821fe6060f1SDimitry Andric /// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that
8220b57cec5SDimitry Andric /// the multiplication may overflow, which is useful when the result will be
8230b57cec5SDimitry Andric /// used in a context where the most significant bits are ignored.
getExactSDiv(const SCEV * LHS,const SCEV * RHS,ScalarEvolution & SE,bool IgnoreSignificantBits=false)8240b57cec5SDimitry Andric static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
8250b57cec5SDimitry Andric ScalarEvolution &SE,
8260b57cec5SDimitry Andric bool IgnoreSignificantBits = false) {
8270b57cec5SDimitry Andric // Handle the trivial case, which works for any SCEV type.
8280b57cec5SDimitry Andric if (LHS == RHS)
8290b57cec5SDimitry Andric return SE.getConstant(LHS->getType(), 1);
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andric // Handle a few RHS special cases.
8320b57cec5SDimitry Andric const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
8330b57cec5SDimitry Andric if (RC) {
8340b57cec5SDimitry Andric const APInt &RA = RC->getAPInt();
8350b57cec5SDimitry Andric // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
8360b57cec5SDimitry Andric // some folding.
837349cc55cSDimitry Andric if (RA.isAllOnes()) {
838fe6060f1SDimitry Andric if (LHS->getType()->isPointerTy())
839fe6060f1SDimitry Andric return nullptr;
8400b57cec5SDimitry Andric return SE.getMulExpr(LHS, RC);
841fe6060f1SDimitry Andric }
8420b57cec5SDimitry Andric // Handle x /s 1 as x.
8430b57cec5SDimitry Andric if (RA == 1)
8440b57cec5SDimitry Andric return LHS;
8450b57cec5SDimitry Andric }
8460b57cec5SDimitry Andric
8470b57cec5SDimitry Andric // Check for a division of a constant by a constant.
8480b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
8490b57cec5SDimitry Andric if (!RC)
8500b57cec5SDimitry Andric return nullptr;
8510b57cec5SDimitry Andric const APInt &LA = C->getAPInt();
8520b57cec5SDimitry Andric const APInt &RA = RC->getAPInt();
8530b57cec5SDimitry Andric if (LA.srem(RA) != 0)
8540b57cec5SDimitry Andric return nullptr;
8550b57cec5SDimitry Andric return SE.getConstant(LA.sdiv(RA));
8560b57cec5SDimitry Andric }
8570b57cec5SDimitry Andric
8580b57cec5SDimitry Andric // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
8590b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
8600b57cec5SDimitry Andric if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
8610b57cec5SDimitry Andric const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
8620b57cec5SDimitry Andric IgnoreSignificantBits);
8630b57cec5SDimitry Andric if (!Step) return nullptr;
8640b57cec5SDimitry Andric const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
8650b57cec5SDimitry Andric IgnoreSignificantBits);
8660b57cec5SDimitry Andric if (!Start) return nullptr;
8670b57cec5SDimitry Andric // FlagNW is independent of the start value, step direction, and is
8680b57cec5SDimitry Andric // preserved with smaller magnitude steps.
8690b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
8700b57cec5SDimitry Andric return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
8710b57cec5SDimitry Andric }
8720b57cec5SDimitry Andric return nullptr;
8730b57cec5SDimitry Andric }
8740b57cec5SDimitry Andric
8750b57cec5SDimitry Andric // Distribute the sdiv over add operands, if the add doesn't overflow.
8760b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
8770b57cec5SDimitry Andric if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
8780b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Ops;
8790b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) {
8800b57cec5SDimitry Andric const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
8810b57cec5SDimitry Andric if (!Op) return nullptr;
8820b57cec5SDimitry Andric Ops.push_back(Op);
8830b57cec5SDimitry Andric }
8840b57cec5SDimitry Andric return SE.getAddExpr(Ops);
8850b57cec5SDimitry Andric }
8860b57cec5SDimitry Andric return nullptr;
8870b57cec5SDimitry Andric }
8880b57cec5SDimitry Andric
8890b57cec5SDimitry Andric // Check for a multiply operand that we can pull RHS out of.
8900b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
8910b57cec5SDimitry Andric if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
892fe6060f1SDimitry Andric // Handle special case C1*X*Y /s C2*X*Y.
893fe6060f1SDimitry Andric if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
894fe6060f1SDimitry Andric if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {
895fe6060f1SDimitry Andric const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
896fe6060f1SDimitry Andric const SCEVConstant *RC =
897fe6060f1SDimitry Andric dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
898fe6060f1SDimitry Andric if (LC && RC) {
899fe6060f1SDimitry Andric SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
900fe6060f1SDimitry Andric SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
901fe6060f1SDimitry Andric if (LOps == ROps)
902fe6060f1SDimitry Andric return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
903fe6060f1SDimitry Andric }
904fe6060f1SDimitry Andric }
905fe6060f1SDimitry Andric }
906fe6060f1SDimitry Andric
9070b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Ops;
9080b57cec5SDimitry Andric bool Found = false;
9090b57cec5SDimitry Andric for (const SCEV *S : Mul->operands()) {
9100b57cec5SDimitry Andric if (!Found)
9110b57cec5SDimitry Andric if (const SCEV *Q = getExactSDiv(S, RHS, SE,
9120b57cec5SDimitry Andric IgnoreSignificantBits)) {
9130b57cec5SDimitry Andric S = Q;
9140b57cec5SDimitry Andric Found = true;
9150b57cec5SDimitry Andric }
9160b57cec5SDimitry Andric Ops.push_back(S);
9170b57cec5SDimitry Andric }
9180b57cec5SDimitry Andric return Found ? SE.getMulExpr(Ops) : nullptr;
9190b57cec5SDimitry Andric }
9200b57cec5SDimitry Andric return nullptr;
9210b57cec5SDimitry Andric }
9220b57cec5SDimitry Andric
9230b57cec5SDimitry Andric // Otherwise we don't know.
9240b57cec5SDimitry Andric return nullptr;
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andric /// If S involves the addition of a constant integer value, return that integer
9280b57cec5SDimitry Andric /// value, and mutate S to point to a new SCEV with that value excluded.
ExtractImmediate(const SCEV * & S,ScalarEvolution & SE)9290fca6ea1SDimitry Andric static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
9300b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
93106c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() <= 64) {
9320b57cec5SDimitry Andric S = SE.getConstant(C->getType(), 0);
9330fca6ea1SDimitry Andric return Immediate::getFixed(C->getValue()->getSExtValue());
9340b57cec5SDimitry Andric }
9350b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
936e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(Add->operands());
9370fca6ea1SDimitry Andric Immediate Result = ExtractImmediate(NewOps.front(), SE);
9380fca6ea1SDimitry Andric if (Result.isNonZero())
9390b57cec5SDimitry Andric S = SE.getAddExpr(NewOps);
9400b57cec5SDimitry Andric return Result;
9410b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
942e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(AR->operands());
9430fca6ea1SDimitry Andric Immediate Result = ExtractImmediate(NewOps.front(), SE);
9440fca6ea1SDimitry Andric if (Result.isNonZero())
9450b57cec5SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(),
9460b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
9470b57cec5SDimitry Andric SCEV::FlagAnyWrap);
9480b57cec5SDimitry Andric return Result;
949*36b606aeSDimitry Andric } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
950*36b606aeSDimitry Andric if (EnableVScaleImmediates && M->getNumOperands() == 2) {
9510fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
9520fca6ea1SDimitry Andric if (isa<SCEVVScale>(M->getOperand(1))) {
9530fca6ea1SDimitry Andric S = SE.getConstant(M->getType(), 0);
9540fca6ea1SDimitry Andric return Immediate::getScalable(C->getValue()->getSExtValue());
9550b57cec5SDimitry Andric }
956*36b606aeSDimitry Andric }
957*36b606aeSDimitry Andric }
9580fca6ea1SDimitry Andric return Immediate::getZero();
9590b57cec5SDimitry Andric }
9600b57cec5SDimitry Andric
9610b57cec5SDimitry Andric /// If S involves the addition of a GlobalValue address, return that symbol, and
9620b57cec5SDimitry Andric /// mutate S to point to a new SCEV with that value excluded.
ExtractSymbol(const SCEV * & S,ScalarEvolution & SE)9630b57cec5SDimitry Andric static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
9640b57cec5SDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
9650b57cec5SDimitry Andric if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
9660b57cec5SDimitry Andric S = SE.getConstant(GV->getType(), 0);
9670b57cec5SDimitry Andric return GV;
9680b57cec5SDimitry Andric }
9690b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
970e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(Add->operands());
9710b57cec5SDimitry Andric GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
9720b57cec5SDimitry Andric if (Result)
9730b57cec5SDimitry Andric S = SE.getAddExpr(NewOps);
9740b57cec5SDimitry Andric return Result;
9750b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
976e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(AR->operands());
9770b57cec5SDimitry Andric GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
9780b57cec5SDimitry Andric if (Result)
9790b57cec5SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(),
9800b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
9810b57cec5SDimitry Andric SCEV::FlagAnyWrap);
9820b57cec5SDimitry Andric return Result;
9830b57cec5SDimitry Andric }
9840b57cec5SDimitry Andric return nullptr;
9850b57cec5SDimitry Andric }
9860b57cec5SDimitry Andric
9870b57cec5SDimitry Andric /// Returns true if the specified instruction is using the specified value as an
9880b57cec5SDimitry Andric /// address.
isAddressUse(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)9890b57cec5SDimitry Andric static bool isAddressUse(const TargetTransformInfo &TTI,
9900b57cec5SDimitry Andric Instruction *Inst, Value *OperandVal) {
9910b57cec5SDimitry Andric bool isAddress = isa<LoadInst>(Inst);
9920b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
9930b57cec5SDimitry Andric if (SI->getPointerOperand() == OperandVal)
9940b57cec5SDimitry Andric isAddress = true;
9950b57cec5SDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
9960b57cec5SDimitry Andric // Addressing modes can also be folded into prefetches and a variety
9970b57cec5SDimitry Andric // of intrinsics.
9980b57cec5SDimitry Andric switch (II->getIntrinsicID()) {
9990b57cec5SDimitry Andric case Intrinsic::memset:
10000b57cec5SDimitry Andric case Intrinsic::prefetch:
10015ffd83dbSDimitry Andric case Intrinsic::masked_load:
10020b57cec5SDimitry Andric if (II->getArgOperand(0) == OperandVal)
10030b57cec5SDimitry Andric isAddress = true;
10040b57cec5SDimitry Andric break;
10055ffd83dbSDimitry Andric case Intrinsic::masked_store:
10065ffd83dbSDimitry Andric if (II->getArgOperand(1) == OperandVal)
10075ffd83dbSDimitry Andric isAddress = true;
10085ffd83dbSDimitry Andric break;
10090b57cec5SDimitry Andric case Intrinsic::memmove:
10100b57cec5SDimitry Andric case Intrinsic::memcpy:
10110b57cec5SDimitry Andric if (II->getArgOperand(0) == OperandVal ||
10120b57cec5SDimitry Andric II->getArgOperand(1) == OperandVal)
10130b57cec5SDimitry Andric isAddress = true;
10140b57cec5SDimitry Andric break;
10150b57cec5SDimitry Andric default: {
10160b57cec5SDimitry Andric MemIntrinsicInfo IntrInfo;
10170b57cec5SDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
10180b57cec5SDimitry Andric if (IntrInfo.PtrVal == OperandVal)
10190b57cec5SDimitry Andric isAddress = true;
10200b57cec5SDimitry Andric }
10210b57cec5SDimitry Andric }
10220b57cec5SDimitry Andric }
10230b57cec5SDimitry Andric } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
10240b57cec5SDimitry Andric if (RMW->getPointerOperand() == OperandVal)
10250b57cec5SDimitry Andric isAddress = true;
10260b57cec5SDimitry Andric } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
10270b57cec5SDimitry Andric if (CmpX->getPointerOperand() == OperandVal)
10280b57cec5SDimitry Andric isAddress = true;
10290b57cec5SDimitry Andric }
10300b57cec5SDimitry Andric return isAddress;
10310b57cec5SDimitry Andric }
10320b57cec5SDimitry Andric
10330b57cec5SDimitry Andric /// Return the type of the memory being accessed.
getAccessType(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)10340b57cec5SDimitry Andric static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
10350b57cec5SDimitry Andric Instruction *Inst, Value *OperandVal) {
103606c3fb27SDimitry Andric MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());
103706c3fb27SDimitry Andric
103806c3fb27SDimitry Andric // First get the type of memory being accessed.
103906c3fb27SDimitry Andric if (Type *Ty = Inst->getAccessType())
104006c3fb27SDimitry Andric AccessTy.MemTy = Ty;
104106c3fb27SDimitry Andric
104206c3fb27SDimitry Andric // Then get the pointer address space.
10430b57cec5SDimitry Andric if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
10440b57cec5SDimitry Andric AccessTy.AddrSpace = SI->getPointerAddressSpace();
10450b57cec5SDimitry Andric } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
10460b57cec5SDimitry Andric AccessTy.AddrSpace = LI->getPointerAddressSpace();
10470b57cec5SDimitry Andric } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
10480b57cec5SDimitry Andric AccessTy.AddrSpace = RMW->getPointerAddressSpace();
10490b57cec5SDimitry Andric } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
10500b57cec5SDimitry Andric AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
10510b57cec5SDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
10520b57cec5SDimitry Andric switch (II->getIntrinsicID()) {
10530b57cec5SDimitry Andric case Intrinsic::prefetch:
10540b57cec5SDimitry Andric case Intrinsic::memset:
10550b57cec5SDimitry Andric AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
10560b57cec5SDimitry Andric AccessTy.MemTy = OperandVal->getType();
10570b57cec5SDimitry Andric break;
10580b57cec5SDimitry Andric case Intrinsic::memmove:
10590b57cec5SDimitry Andric case Intrinsic::memcpy:
10600b57cec5SDimitry Andric AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
10610b57cec5SDimitry Andric AccessTy.MemTy = OperandVal->getType();
10620b57cec5SDimitry Andric break;
10635ffd83dbSDimitry Andric case Intrinsic::masked_load:
10645ffd83dbSDimitry Andric AccessTy.AddrSpace =
10655ffd83dbSDimitry Andric II->getArgOperand(0)->getType()->getPointerAddressSpace();
10665ffd83dbSDimitry Andric break;
10675ffd83dbSDimitry Andric case Intrinsic::masked_store:
10685ffd83dbSDimitry Andric AccessTy.AddrSpace =
10695ffd83dbSDimitry Andric II->getArgOperand(1)->getType()->getPointerAddressSpace();
10705ffd83dbSDimitry Andric break;
10710b57cec5SDimitry Andric default: {
10720b57cec5SDimitry Andric MemIntrinsicInfo IntrInfo;
10730b57cec5SDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
10740b57cec5SDimitry Andric AccessTy.AddrSpace
10750b57cec5SDimitry Andric = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
10760b57cec5SDimitry Andric }
10770b57cec5SDimitry Andric
10780b57cec5SDimitry Andric break;
10790b57cec5SDimitry Andric }
10800b57cec5SDimitry Andric }
10810b57cec5SDimitry Andric }
10820b57cec5SDimitry Andric
10830b57cec5SDimitry Andric return AccessTy;
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric
10860b57cec5SDimitry Andric /// Return true if this AddRec is already a phi in its loop.
isExistingPhi(const SCEVAddRecExpr * AR,ScalarEvolution & SE)10870b57cec5SDimitry Andric static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
10880b57cec5SDimitry Andric for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
10890b57cec5SDimitry Andric if (SE.isSCEVable(PN.getType()) &&
10900b57cec5SDimitry Andric (SE.getEffectiveSCEVType(PN.getType()) ==
10910b57cec5SDimitry Andric SE.getEffectiveSCEVType(AR->getType())) &&
10920b57cec5SDimitry Andric SE.getSCEV(&PN) == AR)
10930b57cec5SDimitry Andric return true;
10940b57cec5SDimitry Andric }
10950b57cec5SDimitry Andric return false;
10960b57cec5SDimitry Andric }
10970b57cec5SDimitry Andric
10980b57cec5SDimitry Andric /// Check if expanding this expression is likely to incur significant cost. This
10990b57cec5SDimitry Andric /// is tricky because SCEV doesn't track which expressions are actually computed
11000b57cec5SDimitry Andric /// by the current IR.
11010b57cec5SDimitry Andric ///
11020b57cec5SDimitry Andric /// We currently allow expansion of IV increments that involve adds,
11030b57cec5SDimitry Andric /// multiplication by constants, and AddRecs from existing phis.
11040b57cec5SDimitry Andric ///
11050b57cec5SDimitry Andric /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
11060b57cec5SDimitry Andric /// obvious multiple of the UDivExpr.
isHighCostExpansion(const SCEV * S,SmallPtrSetImpl<const SCEV * > & Processed,ScalarEvolution & SE)11070b57cec5SDimitry Andric static bool isHighCostExpansion(const SCEV *S,
11080b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV*> &Processed,
11090b57cec5SDimitry Andric ScalarEvolution &SE) {
11100b57cec5SDimitry Andric // Zero/One operand expressions
11110b57cec5SDimitry Andric switch (S->getSCEVType()) {
11120b57cec5SDimitry Andric case scUnknown:
11130b57cec5SDimitry Andric case scConstant:
111406c3fb27SDimitry Andric case scVScale:
11150b57cec5SDimitry Andric return false;
11160b57cec5SDimitry Andric case scTruncate:
11170b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
11180b57cec5SDimitry Andric Processed, SE);
11190b57cec5SDimitry Andric case scZeroExtend:
11200b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
11210b57cec5SDimitry Andric Processed, SE);
11220b57cec5SDimitry Andric case scSignExtend:
11230b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
11240b57cec5SDimitry Andric Processed, SE);
1125e8d8bef9SDimitry Andric default:
1126e8d8bef9SDimitry Andric break;
11270b57cec5SDimitry Andric }
11280b57cec5SDimitry Andric
11290b57cec5SDimitry Andric if (!Processed.insert(S).second)
11300b57cec5SDimitry Andric return false;
11310b57cec5SDimitry Andric
11320b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
11330b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) {
11340b57cec5SDimitry Andric if (isHighCostExpansion(S, Processed, SE))
11350b57cec5SDimitry Andric return true;
11360b57cec5SDimitry Andric }
11370b57cec5SDimitry Andric return false;
11380b57cec5SDimitry Andric }
11390b57cec5SDimitry Andric
11400b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
11410b57cec5SDimitry Andric if (Mul->getNumOperands() == 2) {
11420b57cec5SDimitry Andric // Multiplication by a constant is ok
11430b57cec5SDimitry Andric if (isa<SCEVConstant>(Mul->getOperand(0)))
11440b57cec5SDimitry Andric return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
11450b57cec5SDimitry Andric
11460b57cec5SDimitry Andric // If we have the value of one operand, check if an existing
11470b57cec5SDimitry Andric // multiplication already generates this expression.
11480b57cec5SDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
11490b57cec5SDimitry Andric Value *UVal = U->getValue();
11500b57cec5SDimitry Andric for (User *UR : UVal->users()) {
11510b57cec5SDimitry Andric // If U is a constant, it may be used by a ConstantExpr.
11520b57cec5SDimitry Andric Instruction *UI = dyn_cast<Instruction>(UR);
11530b57cec5SDimitry Andric if (UI && UI->getOpcode() == Instruction::Mul &&
11540b57cec5SDimitry Andric SE.isSCEVable(UI->getType())) {
11550b57cec5SDimitry Andric return SE.getSCEV(UI) == Mul;
11560b57cec5SDimitry Andric }
11570b57cec5SDimitry Andric }
11580b57cec5SDimitry Andric }
11590b57cec5SDimitry Andric }
11600b57cec5SDimitry Andric }
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
11630b57cec5SDimitry Andric if (isExistingPhi(AR, SE))
11640b57cec5SDimitry Andric return false;
11650b57cec5SDimitry Andric }
11660b57cec5SDimitry Andric
11670b57cec5SDimitry Andric // Fow now, consider any other type of expression (div/mul/min/max) high cost.
11680b57cec5SDimitry Andric return true;
11690b57cec5SDimitry Andric }
11700b57cec5SDimitry Andric
11710b57cec5SDimitry Andric namespace {
11720b57cec5SDimitry Andric
11730b57cec5SDimitry Andric class LSRUse;
11740b57cec5SDimitry Andric
11750b57cec5SDimitry Andric } // end anonymous namespace
11760b57cec5SDimitry Andric
11770b57cec5SDimitry Andric /// Check if the addressing mode defined by \p F is completely
11780b57cec5SDimitry Andric /// folded in \p LU at isel time.
11790b57cec5SDimitry Andric /// This includes address-mode folding and special icmp tricks.
11800b57cec5SDimitry Andric /// This function returns true if \p LU can accommodate what \p F
11810b57cec5SDimitry Andric /// defines and up to 1 base + 1 scaled + offset.
11820b57cec5SDimitry Andric /// In other words, if \p F has several base registers, this function may
11830b57cec5SDimitry Andric /// still return true. Therefore, users still need to account for
11840b57cec5SDimitry Andric /// additional base registers and/or unfolded offsets to derive an
11850b57cec5SDimitry Andric /// accurate cost model.
11860b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
11870b57cec5SDimitry Andric const LSRUse &LU, const Formula &F);
11880b57cec5SDimitry Andric
11890b57cec5SDimitry Andric // Get the cost of the scaling factor used in F for LU.
1190fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
11910b57cec5SDimitry Andric const LSRUse &LU, const Formula &F,
11920b57cec5SDimitry Andric const Loop &L);
11930b57cec5SDimitry Andric
11940b57cec5SDimitry Andric namespace {
11950b57cec5SDimitry Andric
11960b57cec5SDimitry Andric /// This class is used to measure and compare candidate formulae.
11970b57cec5SDimitry Andric class Cost {
11980b57cec5SDimitry Andric const Loop *L = nullptr;
11990b57cec5SDimitry Andric ScalarEvolution *SE = nullptr;
12000b57cec5SDimitry Andric const TargetTransformInfo *TTI = nullptr;
12010b57cec5SDimitry Andric TargetTransformInfo::LSRCost C;
1202fe6060f1SDimitry Andric TTI::AddressingModeKind AMK = TTI::AMK_None;
12030b57cec5SDimitry Andric
12040b57cec5SDimitry Andric public:
12050b57cec5SDimitry Andric Cost() = delete;
Cost(const Loop * L,ScalarEvolution & SE,const TargetTransformInfo & TTI,TTI::AddressingModeKind AMK)1206fe6060f1SDimitry Andric Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
1207fe6060f1SDimitry Andric TTI::AddressingModeKind AMK) :
1208fe6060f1SDimitry Andric L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
12090b57cec5SDimitry Andric C.Insns = 0;
12100b57cec5SDimitry Andric C.NumRegs = 0;
12110b57cec5SDimitry Andric C.AddRecCost = 0;
12120b57cec5SDimitry Andric C.NumIVMuls = 0;
12130b57cec5SDimitry Andric C.NumBaseAdds = 0;
12140b57cec5SDimitry Andric C.ImmCost = 0;
12150b57cec5SDimitry Andric C.SetupCost = 0;
12160b57cec5SDimitry Andric C.ScaleCost = 0;
12170b57cec5SDimitry Andric }
12180b57cec5SDimitry Andric
1219bdd1243dSDimitry Andric bool isLess(const Cost &Other) const;
12200b57cec5SDimitry Andric
12210b57cec5SDimitry Andric void Lose();
12220b57cec5SDimitry Andric
12230b57cec5SDimitry Andric #ifndef NDEBUG
12240b57cec5SDimitry Andric // Once any of the metrics loses, they must all remain losers.
isValid()12250b57cec5SDimitry Andric bool isValid() {
12260b57cec5SDimitry Andric return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
12270b57cec5SDimitry Andric | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
12280b57cec5SDimitry Andric || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
12290b57cec5SDimitry Andric & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
12300b57cec5SDimitry Andric }
12310b57cec5SDimitry Andric #endif
12320b57cec5SDimitry Andric
isLoser()12330b57cec5SDimitry Andric bool isLoser() {
12340b57cec5SDimitry Andric assert(isValid() && "invalid cost");
12350b57cec5SDimitry Andric return C.NumRegs == ~0u;
12360b57cec5SDimitry Andric }
12370b57cec5SDimitry Andric
12380b57cec5SDimitry Andric void RateFormula(const Formula &F,
12390b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
12400b57cec5SDimitry Andric const DenseSet<const SCEV *> &VisitedRegs,
12410b57cec5SDimitry Andric const LSRUse &LU,
12420b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
12430b57cec5SDimitry Andric
12440b57cec5SDimitry Andric void print(raw_ostream &OS) const;
12450b57cec5SDimitry Andric void dump() const;
12460b57cec5SDimitry Andric
12470b57cec5SDimitry Andric private:
12480b57cec5SDimitry Andric void RateRegister(const Formula &F, const SCEV *Reg,
12490b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs);
12500b57cec5SDimitry Andric void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
12510b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
12520b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs);
12530b57cec5SDimitry Andric };
12540b57cec5SDimitry Andric
12550b57cec5SDimitry Andric /// An operand value in an instruction which is to be replaced with some
12560b57cec5SDimitry Andric /// equivalent, possibly strength-reduced, replacement.
12570b57cec5SDimitry Andric struct LSRFixup {
12580b57cec5SDimitry Andric /// The instruction which will be updated.
12590b57cec5SDimitry Andric Instruction *UserInst = nullptr;
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric /// The operand of the instruction which will be replaced. The operand may be
12620b57cec5SDimitry Andric /// used more than once; every instance will be replaced.
12630b57cec5SDimitry Andric Value *OperandValToReplace = nullptr;
12640b57cec5SDimitry Andric
12650b57cec5SDimitry Andric /// If this user is to use the post-incremented value of an induction
12660b57cec5SDimitry Andric /// variable, this set is non-empty and holds the loops associated with the
12670b57cec5SDimitry Andric /// induction variable.
12680b57cec5SDimitry Andric PostIncLoopSet PostIncLoops;
12690b57cec5SDimitry Andric
12700b57cec5SDimitry Andric /// A constant offset to be added to the LSRUse expression. This allows
12710b57cec5SDimitry Andric /// multiple fixups to share the same LSRUse with different offsets, for
12720b57cec5SDimitry Andric /// example in an unrolled loop.
12730fca6ea1SDimitry Andric Immediate Offset = Immediate::getZero();
12740b57cec5SDimitry Andric
12750b57cec5SDimitry Andric LSRFixup() = default;
12760b57cec5SDimitry Andric
12770b57cec5SDimitry Andric bool isUseFullyOutsideLoop(const Loop *L) const;
12780b57cec5SDimitry Andric
12790b57cec5SDimitry Andric void print(raw_ostream &OS) const;
12800b57cec5SDimitry Andric void dump() const;
12810b57cec5SDimitry Andric };
12820b57cec5SDimitry Andric
12830b57cec5SDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
12840b57cec5SDimitry Andric /// SmallVectors of const SCEV*.
12850b57cec5SDimitry Andric struct UniquifierDenseMapInfo {
getEmptyKey__anonc21373340811::UniquifierDenseMapInfo12860b57cec5SDimitry Andric static SmallVector<const SCEV *, 4> getEmptyKey() {
12870b57cec5SDimitry Andric SmallVector<const SCEV *, 4> V;
12880b57cec5SDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-1));
12890b57cec5SDimitry Andric return V;
12900b57cec5SDimitry Andric }
12910b57cec5SDimitry Andric
getTombstoneKey__anonc21373340811::UniquifierDenseMapInfo12920b57cec5SDimitry Andric static SmallVector<const SCEV *, 4> getTombstoneKey() {
12930b57cec5SDimitry Andric SmallVector<const SCEV *, 4> V;
12940b57cec5SDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-2));
12950b57cec5SDimitry Andric return V;
12960b57cec5SDimitry Andric }
12970b57cec5SDimitry Andric
getHashValue__anonc21373340811::UniquifierDenseMapInfo12980b57cec5SDimitry Andric static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
12990b57cec5SDimitry Andric return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
13000b57cec5SDimitry Andric }
13010b57cec5SDimitry Andric
isEqual__anonc21373340811::UniquifierDenseMapInfo13020b57cec5SDimitry Andric static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
13030b57cec5SDimitry Andric const SmallVector<const SCEV *, 4> &RHS) {
13040b57cec5SDimitry Andric return LHS == RHS;
13050b57cec5SDimitry Andric }
13060b57cec5SDimitry Andric };
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andric /// This class holds the state that LSR keeps for each use in IVUsers, as well
13090b57cec5SDimitry Andric /// as uses invented by LSR itself. It includes information about what kinds of
13100b57cec5SDimitry Andric /// things can be folded into the user, information about the user itself, and
13110b57cec5SDimitry Andric /// information about how the use may be satisfied. TODO: Represent multiple
13120b57cec5SDimitry Andric /// users of the same expression in common?
13130b57cec5SDimitry Andric class LSRUse {
13140b57cec5SDimitry Andric DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
13150b57cec5SDimitry Andric
13160b57cec5SDimitry Andric public:
13170b57cec5SDimitry Andric /// An enum for a kind of use, indicating what types of scaled and immediate
13180b57cec5SDimitry Andric /// operands it might support.
13190b57cec5SDimitry Andric enum KindType {
13200b57cec5SDimitry Andric Basic, ///< A normal use, with no folding.
13210b57cec5SDimitry Andric Special, ///< A special case of basic, allowing -1 scales.
13220b57cec5SDimitry Andric Address, ///< An address use; folding according to TargetLowering
13230b57cec5SDimitry Andric ICmpZero ///< An equality icmp with both operands folded into one.
13240b57cec5SDimitry Andric // TODO: Add a generic icmp too?
13250b57cec5SDimitry Andric };
13260b57cec5SDimitry Andric
13270b57cec5SDimitry Andric using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
13280b57cec5SDimitry Andric
13290b57cec5SDimitry Andric KindType Kind;
13300b57cec5SDimitry Andric MemAccessTy AccessTy;
13310b57cec5SDimitry Andric
13320b57cec5SDimitry Andric /// The list of operands which are to be replaced.
13330b57cec5SDimitry Andric SmallVector<LSRFixup, 8> Fixups;
13340b57cec5SDimitry Andric
13350b57cec5SDimitry Andric /// Keep track of the min and max offsets of the fixups.
13360fca6ea1SDimitry Andric Immediate MinOffset = Immediate::getFixedMax();
13370fca6ea1SDimitry Andric Immediate MaxOffset = Immediate::getFixedMin();
13380b57cec5SDimitry Andric
13390b57cec5SDimitry Andric /// This records whether all of the fixups using this LSRUse are outside of
13400b57cec5SDimitry Andric /// the loop, in which case some special-case heuristics may be used.
13410b57cec5SDimitry Andric bool AllFixupsOutsideLoop = true;
13420b57cec5SDimitry Andric
13430b57cec5SDimitry Andric /// RigidFormula is set to true to guarantee that this use will be associated
13440b57cec5SDimitry Andric /// with a single formula--the one that initially matched. Some SCEV
13450b57cec5SDimitry Andric /// expressions cannot be expanded. This allows LSR to consider the registers
13460b57cec5SDimitry Andric /// used by those expressions without the need to expand them later after
13470b57cec5SDimitry Andric /// changing the formula.
13480b57cec5SDimitry Andric bool RigidFormula = false;
13490b57cec5SDimitry Andric
13500b57cec5SDimitry Andric /// This records the widest use type for any fixup using this
13510b57cec5SDimitry Andric /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
13520b57cec5SDimitry Andric /// fixup widths to be equivalent, because the narrower one may be relying on
13530b57cec5SDimitry Andric /// the implicit truncation to truncate away bogus bits.
13540b57cec5SDimitry Andric Type *WidestFixupType = nullptr;
13550b57cec5SDimitry Andric
13560b57cec5SDimitry Andric /// A list of ways to build a value that can satisfy this user. After the
13570b57cec5SDimitry Andric /// list is populated, one of these is selected heuristically and used to
13580b57cec5SDimitry Andric /// formulate a replacement for OperandValToReplace in UserInst.
13590b57cec5SDimitry Andric SmallVector<Formula, 12> Formulae;
13600b57cec5SDimitry Andric
13610b57cec5SDimitry Andric /// The set of register candidates used by all formulae in this LSRUse.
13620b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> Regs;
13630b57cec5SDimitry Andric
LSRUse(KindType K,MemAccessTy AT)13640b57cec5SDimitry Andric LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
13650b57cec5SDimitry Andric
getNewFixup()13660b57cec5SDimitry Andric LSRFixup &getNewFixup() {
13670b57cec5SDimitry Andric Fixups.push_back(LSRFixup());
13680b57cec5SDimitry Andric return Fixups.back();
13690b57cec5SDimitry Andric }
13700b57cec5SDimitry Andric
pushFixup(LSRFixup & f)13710b57cec5SDimitry Andric void pushFixup(LSRFixup &f) {
13720b57cec5SDimitry Andric Fixups.push_back(f);
13730fca6ea1SDimitry Andric if (Immediate::isKnownGT(f.Offset, MaxOffset))
13740b57cec5SDimitry Andric MaxOffset = f.Offset;
13750fca6ea1SDimitry Andric if (Immediate::isKnownLT(f.Offset, MinOffset))
13760b57cec5SDimitry Andric MinOffset = f.Offset;
13770b57cec5SDimitry Andric }
13780b57cec5SDimitry Andric
13790b57cec5SDimitry Andric bool HasFormulaWithSameRegs(const Formula &F) const;
13800b57cec5SDimitry Andric float getNotSelectedProbability(const SCEV *Reg) const;
13810b57cec5SDimitry Andric bool InsertFormula(const Formula &F, const Loop &L);
13820b57cec5SDimitry Andric void DeleteFormula(Formula &F);
13830b57cec5SDimitry Andric void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
13840b57cec5SDimitry Andric
13850b57cec5SDimitry Andric void print(raw_ostream &OS) const;
13860b57cec5SDimitry Andric void dump() const;
13870b57cec5SDimitry Andric };
13880b57cec5SDimitry Andric
13890b57cec5SDimitry Andric } // end anonymous namespace
13900b57cec5SDimitry Andric
13910b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
13920b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
13930fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset,
13940b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale,
13950b57cec5SDimitry Andric Instruction *Fixup = nullptr);
13960b57cec5SDimitry Andric
getSetupCost(const SCEV * Reg,unsigned Depth)13970b57cec5SDimitry Andric static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
13980b57cec5SDimitry Andric if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
13990b57cec5SDimitry Andric return 1;
14000b57cec5SDimitry Andric if (Depth == 0)
14010b57cec5SDimitry Andric return 0;
14020b57cec5SDimitry Andric if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
14030b57cec5SDimitry Andric return getSetupCost(S->getStart(), Depth - 1);
1404e8d8bef9SDimitry Andric if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
14050b57cec5SDimitry Andric return getSetupCost(S->getOperand(), Depth - 1);
14060b57cec5SDimitry Andric if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
1407bdd1243dSDimitry Andric return std::accumulate(S->operands().begin(), S->operands().end(), 0,
14080b57cec5SDimitry Andric [&](unsigned i, const SCEV *Reg) {
14090b57cec5SDimitry Andric return i + getSetupCost(Reg, Depth - 1);
14100b57cec5SDimitry Andric });
14110b57cec5SDimitry Andric if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
14120b57cec5SDimitry Andric return getSetupCost(S->getLHS(), Depth - 1) +
14130b57cec5SDimitry Andric getSetupCost(S->getRHS(), Depth - 1);
14140b57cec5SDimitry Andric return 0;
14150b57cec5SDimitry Andric }
14160b57cec5SDimitry Andric
14170b57cec5SDimitry Andric /// Tally up interesting quantities from the given register.
RateRegister(const Formula & F,const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs)14180b57cec5SDimitry Andric void Cost::RateRegister(const Formula &F, const SCEV *Reg,
14190b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs) {
14200b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
14210b57cec5SDimitry Andric // If this is an addrec for another loop, it should be an invariant
14220b57cec5SDimitry Andric // with respect to L since L is the innermost loop (at least
14230b57cec5SDimitry Andric // for now LSR only handles innermost loops).
14240b57cec5SDimitry Andric if (AR->getLoop() != L) {
14250b57cec5SDimitry Andric // If the AddRec exists, consider it's register free and leave it alone.
1426fe6060f1SDimitry Andric if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)
14270b57cec5SDimitry Andric return;
14280b57cec5SDimitry Andric
14290b57cec5SDimitry Andric // It is bad to allow LSR for current loop to add induction variables
14300b57cec5SDimitry Andric // for its sibling loops.
14310b57cec5SDimitry Andric if (!AR->getLoop()->contains(L)) {
14320b57cec5SDimitry Andric Lose();
14330b57cec5SDimitry Andric return;
14340b57cec5SDimitry Andric }
14350b57cec5SDimitry Andric
14360b57cec5SDimitry Andric // Otherwise, it will be an invariant with respect to Loop L.
14370b57cec5SDimitry Andric ++C.NumRegs;
14380b57cec5SDimitry Andric return;
14390b57cec5SDimitry Andric }
14400b57cec5SDimitry Andric
14410b57cec5SDimitry Andric unsigned LoopCost = 1;
14420b57cec5SDimitry Andric if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
14430b57cec5SDimitry Andric TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
14440b57cec5SDimitry Andric
14450b57cec5SDimitry Andric // If the step size matches the base offset, we could use pre-indexed
14460b57cec5SDimitry Andric // addressing.
14470fca6ea1SDimitry Andric if (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed()) {
14480b57cec5SDimitry Andric if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
14490fca6ea1SDimitry Andric if (Step->getAPInt() == F.BaseOffset.getFixedValue())
14500b57cec5SDimitry Andric LoopCost = 0;
1451fe6060f1SDimitry Andric } else if (AMK == TTI::AMK_PostIndexed) {
14520b57cec5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(*SE);
14530b57cec5SDimitry Andric if (isa<SCEVConstant>(LoopStep)) {
14540b57cec5SDimitry Andric const SCEV *LoopStart = AR->getStart();
14550b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) &&
14560b57cec5SDimitry Andric SE->isLoopInvariant(LoopStart, L))
14570b57cec5SDimitry Andric LoopCost = 0;
14580b57cec5SDimitry Andric }
14590b57cec5SDimitry Andric }
14600b57cec5SDimitry Andric }
14610b57cec5SDimitry Andric C.AddRecCost += LoopCost;
14620b57cec5SDimitry Andric
14630b57cec5SDimitry Andric // Add the step value register, if it needs one.
14640b57cec5SDimitry Andric // TODO: The non-affine case isn't precisely modeled here.
14650b57cec5SDimitry Andric if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
14660b57cec5SDimitry Andric if (!Regs.count(AR->getOperand(1))) {
14670b57cec5SDimitry Andric RateRegister(F, AR->getOperand(1), Regs);
14680b57cec5SDimitry Andric if (isLoser())
14690b57cec5SDimitry Andric return;
14700b57cec5SDimitry Andric }
14710b57cec5SDimitry Andric }
14720b57cec5SDimitry Andric }
14730b57cec5SDimitry Andric ++C.NumRegs;
14740b57cec5SDimitry Andric
14750b57cec5SDimitry Andric // Rough heuristic; favor registers which don't require extra setup
14760b57cec5SDimitry Andric // instructions in the preheader.
14770b57cec5SDimitry Andric C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
14780b57cec5SDimitry Andric // Ensure we don't, even with the recusion limit, produce invalid costs.
14790b57cec5SDimitry Andric C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
14800b57cec5SDimitry Andric
14810b57cec5SDimitry Andric C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
14820b57cec5SDimitry Andric SE->hasComputableLoopEvolution(Reg, L);
14830b57cec5SDimitry Andric }
14840b57cec5SDimitry Andric
14850b57cec5SDimitry Andric /// Record this register in the set. If we haven't seen it before, rate
14860b57cec5SDimitry Andric /// it. Optional LoserRegs provides a way to declare any formula that refers to
14870b57cec5SDimitry Andric /// one of those regs an instant loser.
RatePrimaryRegister(const Formula & F,const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,SmallPtrSetImpl<const SCEV * > * LoserRegs)14880b57cec5SDimitry Andric void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
14890b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
14900b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs) {
14910b57cec5SDimitry Andric if (LoserRegs && LoserRegs->count(Reg)) {
14920b57cec5SDimitry Andric Lose();
14930b57cec5SDimitry Andric return;
14940b57cec5SDimitry Andric }
14950b57cec5SDimitry Andric if (Regs.insert(Reg).second) {
14960b57cec5SDimitry Andric RateRegister(F, Reg, Regs);
14970b57cec5SDimitry Andric if (LoserRegs && isLoser())
14980b57cec5SDimitry Andric LoserRegs->insert(Reg);
14990b57cec5SDimitry Andric }
15000b57cec5SDimitry Andric }
15010b57cec5SDimitry Andric
RateFormula(const Formula & F,SmallPtrSetImpl<const SCEV * > & Regs,const DenseSet<const SCEV * > & VisitedRegs,const LSRUse & LU,SmallPtrSetImpl<const SCEV * > * LoserRegs)15020b57cec5SDimitry Andric void Cost::RateFormula(const Formula &F,
15030b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs,
15040b57cec5SDimitry Andric const DenseSet<const SCEV *> &VisitedRegs,
15050b57cec5SDimitry Andric const LSRUse &LU,
15060b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs) {
150781ad6265SDimitry Andric if (isLoser())
150881ad6265SDimitry Andric return;
15090b57cec5SDimitry Andric assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
15100b57cec5SDimitry Andric // Tally up the registers.
15110b57cec5SDimitry Andric unsigned PrevAddRecCost = C.AddRecCost;
15120b57cec5SDimitry Andric unsigned PrevNumRegs = C.NumRegs;
15130b57cec5SDimitry Andric unsigned PrevNumBaseAdds = C.NumBaseAdds;
15140b57cec5SDimitry Andric if (const SCEV *ScaledReg = F.ScaledReg) {
15150b57cec5SDimitry Andric if (VisitedRegs.count(ScaledReg)) {
15160b57cec5SDimitry Andric Lose();
15170b57cec5SDimitry Andric return;
15180b57cec5SDimitry Andric }
15190b57cec5SDimitry Andric RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
15200b57cec5SDimitry Andric if (isLoser())
15210b57cec5SDimitry Andric return;
15220b57cec5SDimitry Andric }
15230b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) {
15240b57cec5SDimitry Andric if (VisitedRegs.count(BaseReg)) {
15250b57cec5SDimitry Andric Lose();
15260b57cec5SDimitry Andric return;
15270b57cec5SDimitry Andric }
15280b57cec5SDimitry Andric RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
15290b57cec5SDimitry Andric if (isLoser())
15300b57cec5SDimitry Andric return;
15310b57cec5SDimitry Andric }
15320b57cec5SDimitry Andric
15330b57cec5SDimitry Andric // Determine how many (unfolded) adds we'll need inside the loop.
15340b57cec5SDimitry Andric size_t NumBaseParts = F.getNumRegs();
15350b57cec5SDimitry Andric if (NumBaseParts > 1)
15360b57cec5SDimitry Andric // Do not count the base and a possible second register if the target
15370b57cec5SDimitry Andric // allows to fold 2 registers.
15380b57cec5SDimitry Andric C.NumBaseAdds +=
15390b57cec5SDimitry Andric NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
15400fca6ea1SDimitry Andric C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
15410b57cec5SDimitry Andric
15420b57cec5SDimitry Andric // Accumulate non-free scaling amounts.
1543fe6060f1SDimitry Andric C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
15440b57cec5SDimitry Andric
15450b57cec5SDimitry Andric // Tally up the non-zero immediates.
15460b57cec5SDimitry Andric for (const LSRFixup &Fixup : LU.Fixups) {
15470fca6ea1SDimitry Andric if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {
15480fca6ea1SDimitry Andric Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);
15490b57cec5SDimitry Andric if (F.BaseGV)
15500b57cec5SDimitry Andric C.ImmCost += 64; // Handle symbolic values conservatively.
15510b57cec5SDimitry Andric // TODO: This should probably be the pointer size.
15520fca6ea1SDimitry Andric else if (Offset.isNonZero())
15530fca6ea1SDimitry Andric C.ImmCost +=
15540fca6ea1SDimitry Andric APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();
15550b57cec5SDimitry Andric
15560b57cec5SDimitry Andric // Check with target if this offset with this instruction is
15570b57cec5SDimitry Andric // specifically not supported.
15580fca6ea1SDimitry Andric if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
15590b57cec5SDimitry Andric !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
15600b57cec5SDimitry Andric Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
15610b57cec5SDimitry Andric C.NumBaseAdds++;
15620fca6ea1SDimitry Andric } else {
15630fca6ea1SDimitry Andric // Incompatible immediate type, increase cost to avoid using
15640fca6ea1SDimitry Andric C.ImmCost += 2048;
15650fca6ea1SDimitry Andric }
15660b57cec5SDimitry Andric }
15670b57cec5SDimitry Andric
15680b57cec5SDimitry Andric // If we don't count instruction cost exit here.
15690b57cec5SDimitry Andric if (!InsnsCost) {
15700b57cec5SDimitry Andric assert(isValid() && "invalid cost");
15710b57cec5SDimitry Andric return;
15720b57cec5SDimitry Andric }
15730b57cec5SDimitry Andric
15740b57cec5SDimitry Andric // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
15750b57cec5SDimitry Andric // additional instruction (at least fill).
15768bcb0991SDimitry Andric // TODO: Need distinguish register class?
15778bcb0991SDimitry Andric unsigned TTIRegNum = TTI->getNumberOfRegisters(
15788bcb0991SDimitry Andric TTI->getRegisterClassForType(false, F.getType())) - 1;
15790b57cec5SDimitry Andric if (C.NumRegs > TTIRegNum) {
15800b57cec5SDimitry Andric // Cost already exceeded TTIRegNum, then only newly added register can add
15810b57cec5SDimitry Andric // new instructions.
15820b57cec5SDimitry Andric if (PrevNumRegs > TTIRegNum)
15830b57cec5SDimitry Andric C.Insns += (C.NumRegs - PrevNumRegs);
15840b57cec5SDimitry Andric else
15850b57cec5SDimitry Andric C.Insns += (C.NumRegs - TTIRegNum);
15860b57cec5SDimitry Andric }
15870b57cec5SDimitry Andric
15880b57cec5SDimitry Andric // If ICmpZero formula ends with not 0, it could not be replaced by
15890b57cec5SDimitry Andric // just add or sub. We'll need to compare final result of AddRec.
15900b57cec5SDimitry Andric // That means we'll need an additional instruction. But if the target can
15910b57cec5SDimitry Andric // macro-fuse a compare with a branch, don't count this extra instruction.
15920b57cec5SDimitry Andric // For -10 + {0, +, 1}:
15930b57cec5SDimitry Andric // i = i + 1;
15940b57cec5SDimitry Andric // cmp i, 10
15950b57cec5SDimitry Andric //
15960b57cec5SDimitry Andric // For {-10, +, 1}:
15970b57cec5SDimitry Andric // i = i + 1;
15980b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
15990b57cec5SDimitry Andric !TTI->canMacroFuseCmp())
16000b57cec5SDimitry Andric C.Insns++;
16010b57cec5SDimitry Andric // Each new AddRec adds 1 instruction to calculation.
16020b57cec5SDimitry Andric C.Insns += (C.AddRecCost - PrevAddRecCost);
16030b57cec5SDimitry Andric
16040b57cec5SDimitry Andric // BaseAdds adds instructions for unfolded registers.
16050b57cec5SDimitry Andric if (LU.Kind != LSRUse::ICmpZero)
16060b57cec5SDimitry Andric C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
16070b57cec5SDimitry Andric assert(isValid() && "invalid cost");
16080b57cec5SDimitry Andric }
16090b57cec5SDimitry Andric
16100b57cec5SDimitry Andric /// Set this cost to a losing value.
Lose()16110b57cec5SDimitry Andric void Cost::Lose() {
16120b57cec5SDimitry Andric C.Insns = std::numeric_limits<unsigned>::max();
16130b57cec5SDimitry Andric C.NumRegs = std::numeric_limits<unsigned>::max();
16140b57cec5SDimitry Andric C.AddRecCost = std::numeric_limits<unsigned>::max();
16150b57cec5SDimitry Andric C.NumIVMuls = std::numeric_limits<unsigned>::max();
16160b57cec5SDimitry Andric C.NumBaseAdds = std::numeric_limits<unsigned>::max();
16170b57cec5SDimitry Andric C.ImmCost = std::numeric_limits<unsigned>::max();
16180b57cec5SDimitry Andric C.SetupCost = std::numeric_limits<unsigned>::max();
16190b57cec5SDimitry Andric C.ScaleCost = std::numeric_limits<unsigned>::max();
16200b57cec5SDimitry Andric }
16210b57cec5SDimitry Andric
16220b57cec5SDimitry Andric /// Choose the lower cost.
isLess(const Cost & Other) const1623bdd1243dSDimitry Andric bool Cost::isLess(const Cost &Other) const {
16240b57cec5SDimitry Andric if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
16250b57cec5SDimitry Andric C.Insns != Other.C.Insns)
16260b57cec5SDimitry Andric return C.Insns < Other.C.Insns;
16270b57cec5SDimitry Andric return TTI->isLSRCostLess(C, Other.C);
16280b57cec5SDimitry Andric }
16290b57cec5SDimitry Andric
16300b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const16310b57cec5SDimitry Andric void Cost::print(raw_ostream &OS) const {
16320b57cec5SDimitry Andric if (InsnsCost)
16330b57cec5SDimitry Andric OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
16340b57cec5SDimitry Andric OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
16350b57cec5SDimitry Andric if (C.AddRecCost != 0)
16360b57cec5SDimitry Andric OS << ", with addrec cost " << C.AddRecCost;
16370b57cec5SDimitry Andric if (C.NumIVMuls != 0)
16380b57cec5SDimitry Andric OS << ", plus " << C.NumIVMuls << " IV mul"
16390b57cec5SDimitry Andric << (C.NumIVMuls == 1 ? "" : "s");
16400b57cec5SDimitry Andric if (C.NumBaseAdds != 0)
16410b57cec5SDimitry Andric OS << ", plus " << C.NumBaseAdds << " base add"
16420b57cec5SDimitry Andric << (C.NumBaseAdds == 1 ? "" : "s");
16430b57cec5SDimitry Andric if (C.ScaleCost != 0)
16440b57cec5SDimitry Andric OS << ", plus " << C.ScaleCost << " scale cost";
16450b57cec5SDimitry Andric if (C.ImmCost != 0)
16460b57cec5SDimitry Andric OS << ", plus " << C.ImmCost << " imm cost";
16470b57cec5SDimitry Andric if (C.SetupCost != 0)
16480b57cec5SDimitry Andric OS << ", plus " << C.SetupCost << " setup cost";
16490b57cec5SDimitry Andric }
16500b57cec5SDimitry Andric
dump() const16510b57cec5SDimitry Andric LLVM_DUMP_METHOD void Cost::dump() const {
16520b57cec5SDimitry Andric print(errs()); errs() << '\n';
16530b57cec5SDimitry Andric }
16540b57cec5SDimitry Andric #endif
16550b57cec5SDimitry Andric
16560b57cec5SDimitry Andric /// Test whether this fixup always uses its value outside of the given loop.
isUseFullyOutsideLoop(const Loop * L) const16570b57cec5SDimitry Andric bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
16580b57cec5SDimitry Andric // PHI nodes use their value in their incoming blocks.
16590b57cec5SDimitry Andric if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
16600b57cec5SDimitry Andric for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
16610b57cec5SDimitry Andric if (PN->getIncomingValue(i) == OperandValToReplace &&
16620b57cec5SDimitry Andric L->contains(PN->getIncomingBlock(i)))
16630b57cec5SDimitry Andric return false;
16640b57cec5SDimitry Andric return true;
16650b57cec5SDimitry Andric }
16660b57cec5SDimitry Andric
16670b57cec5SDimitry Andric return !L->contains(UserInst);
16680b57cec5SDimitry Andric }
16690b57cec5SDimitry Andric
16700b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const16710b57cec5SDimitry Andric void LSRFixup::print(raw_ostream &OS) const {
16720b57cec5SDimitry Andric OS << "UserInst=";
16730b57cec5SDimitry Andric // Store is common and interesting enough to be worth special-casing.
16740b57cec5SDimitry Andric if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
16750b57cec5SDimitry Andric OS << "store ";
16760b57cec5SDimitry Andric Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
16770b57cec5SDimitry Andric } else if (UserInst->getType()->isVoidTy())
16780b57cec5SDimitry Andric OS << UserInst->getOpcodeName();
16790b57cec5SDimitry Andric else
16800b57cec5SDimitry Andric UserInst->printAsOperand(OS, /*PrintType=*/false);
16810b57cec5SDimitry Andric
16820b57cec5SDimitry Andric OS << ", OperandValToReplace=";
16830b57cec5SDimitry Andric OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
16840b57cec5SDimitry Andric
16850b57cec5SDimitry Andric for (const Loop *PIL : PostIncLoops) {
16860b57cec5SDimitry Andric OS << ", PostIncLoop=";
16870b57cec5SDimitry Andric PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
16880b57cec5SDimitry Andric }
16890b57cec5SDimitry Andric
16900fca6ea1SDimitry Andric if (Offset.isNonZero())
16910b57cec5SDimitry Andric OS << ", Offset=" << Offset;
16920b57cec5SDimitry Andric }
16930b57cec5SDimitry Andric
dump() const16940b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRFixup::dump() const {
16950b57cec5SDimitry Andric print(errs()); errs() << '\n';
16960b57cec5SDimitry Andric }
16970b57cec5SDimitry Andric #endif
16980b57cec5SDimitry Andric
16990b57cec5SDimitry Andric /// Test whether this use as a formula which has the same registers as the given
17000b57cec5SDimitry Andric /// formula.
HasFormulaWithSameRegs(const Formula & F) const17010b57cec5SDimitry Andric bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
17020b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs;
17030b57cec5SDimitry Andric if (F.ScaledReg) Key.push_back(F.ScaledReg);
17040b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for uniquifying.
17050b57cec5SDimitry Andric llvm::sort(Key);
17060b57cec5SDimitry Andric return Uniquifier.count(Key);
17070b57cec5SDimitry Andric }
17080b57cec5SDimitry Andric
17090b57cec5SDimitry Andric /// The function returns a probability of selecting formula without Reg.
getNotSelectedProbability(const SCEV * Reg) const17100b57cec5SDimitry Andric float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
17110b57cec5SDimitry Andric unsigned FNum = 0;
17120b57cec5SDimitry Andric for (const Formula &F : Formulae)
17130b57cec5SDimitry Andric if (F.referencesReg(Reg))
17140b57cec5SDimitry Andric FNum++;
17150b57cec5SDimitry Andric return ((float)(Formulae.size() - FNum)) / Formulae.size();
17160b57cec5SDimitry Andric }
17170b57cec5SDimitry Andric
17180b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
17190b57cec5SDimitry Andric /// return true. Return false otherwise. The formula must be in canonical form.
InsertFormula(const Formula & F,const Loop & L)17200b57cec5SDimitry Andric bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
17210b57cec5SDimitry Andric assert(F.isCanonical(L) && "Invalid canonical representation");
17220b57cec5SDimitry Andric
17230b57cec5SDimitry Andric if (!Formulae.empty() && RigidFormula)
17240b57cec5SDimitry Andric return false;
17250b57cec5SDimitry Andric
17260b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs;
17270b57cec5SDimitry Andric if (F.ScaledReg) Key.push_back(F.ScaledReg);
17280b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for uniquifying.
17290b57cec5SDimitry Andric llvm::sort(Key);
17300b57cec5SDimitry Andric
17310b57cec5SDimitry Andric if (!Uniquifier.insert(Key).second)
17320b57cec5SDimitry Andric return false;
17330b57cec5SDimitry Andric
17340b57cec5SDimitry Andric // Using a register to hold the value of 0 is not profitable.
17350b57cec5SDimitry Andric assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
17360b57cec5SDimitry Andric "Zero allocated in a scaled register!");
17370b57cec5SDimitry Andric #ifndef NDEBUG
17380b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs)
17390b57cec5SDimitry Andric assert(!BaseReg->isZero() && "Zero allocated in a base register!");
17400b57cec5SDimitry Andric #endif
17410b57cec5SDimitry Andric
17420b57cec5SDimitry Andric // Add the formula to the list.
17430b57cec5SDimitry Andric Formulae.push_back(F);
17440b57cec5SDimitry Andric
17450b57cec5SDimitry Andric // Record registers now being used by this use.
17460b57cec5SDimitry Andric Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
17470b57cec5SDimitry Andric if (F.ScaledReg)
17480b57cec5SDimitry Andric Regs.insert(F.ScaledReg);
17490b57cec5SDimitry Andric
17500b57cec5SDimitry Andric return true;
17510b57cec5SDimitry Andric }
17520b57cec5SDimitry Andric
17530b57cec5SDimitry Andric /// Remove the given formula from this use's list.
DeleteFormula(Formula & F)17540b57cec5SDimitry Andric void LSRUse::DeleteFormula(Formula &F) {
17550b57cec5SDimitry Andric if (&F != &Formulae.back())
17560b57cec5SDimitry Andric std::swap(F, Formulae.back());
17570b57cec5SDimitry Andric Formulae.pop_back();
17580b57cec5SDimitry Andric }
17590b57cec5SDimitry Andric
17600b57cec5SDimitry Andric /// Recompute the Regs field, and update RegUses.
RecomputeRegs(size_t LUIdx,RegUseTracker & RegUses)17610b57cec5SDimitry Andric void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
17620b57cec5SDimitry Andric // Now that we've filtered out some formulae, recompute the Regs set.
17630b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
17640b57cec5SDimitry Andric Regs.clear();
17650b57cec5SDimitry Andric for (const Formula &F : Formulae) {
17660b57cec5SDimitry Andric if (F.ScaledReg) Regs.insert(F.ScaledReg);
17670b57cec5SDimitry Andric Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
17680b57cec5SDimitry Andric }
17690b57cec5SDimitry Andric
17700b57cec5SDimitry Andric // Update the RegTracker.
17710b57cec5SDimitry Andric for (const SCEV *S : OldRegs)
17720b57cec5SDimitry Andric if (!Regs.count(S))
17730b57cec5SDimitry Andric RegUses.dropRegister(S, LUIdx);
17740b57cec5SDimitry Andric }
17750b57cec5SDimitry Andric
17760b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const17770b57cec5SDimitry Andric void LSRUse::print(raw_ostream &OS) const {
17780b57cec5SDimitry Andric OS << "LSR Use: Kind=";
17790b57cec5SDimitry Andric switch (Kind) {
17800b57cec5SDimitry Andric case Basic: OS << "Basic"; break;
17810b57cec5SDimitry Andric case Special: OS << "Special"; break;
17820b57cec5SDimitry Andric case ICmpZero: OS << "ICmpZero"; break;
17830b57cec5SDimitry Andric case Address:
17840b57cec5SDimitry Andric OS << "Address of ";
17850b57cec5SDimitry Andric if (AccessTy.MemTy->isPointerTy())
17860b57cec5SDimitry Andric OS << "pointer"; // the full pointer type could be really verbose
17870b57cec5SDimitry Andric else {
17880b57cec5SDimitry Andric OS << *AccessTy.MemTy;
17890b57cec5SDimitry Andric }
17900b57cec5SDimitry Andric
17910b57cec5SDimitry Andric OS << " in addrspace(" << AccessTy.AddrSpace << ')';
17920b57cec5SDimitry Andric }
17930b57cec5SDimitry Andric
17940b57cec5SDimitry Andric OS << ", Offsets={";
17950b57cec5SDimitry Andric bool NeedComma = false;
17960b57cec5SDimitry Andric for (const LSRFixup &Fixup : Fixups) {
17970b57cec5SDimitry Andric if (NeedComma) OS << ',';
17980b57cec5SDimitry Andric OS << Fixup.Offset;
17990b57cec5SDimitry Andric NeedComma = true;
18000b57cec5SDimitry Andric }
18010b57cec5SDimitry Andric OS << '}';
18020b57cec5SDimitry Andric
18030b57cec5SDimitry Andric if (AllFixupsOutsideLoop)
18040b57cec5SDimitry Andric OS << ", all-fixups-outside-loop";
18050b57cec5SDimitry Andric
18060b57cec5SDimitry Andric if (WidestFixupType)
18070b57cec5SDimitry Andric OS << ", widest fixup type: " << *WidestFixupType;
18080b57cec5SDimitry Andric }
18090b57cec5SDimitry Andric
dump() const18100b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRUse::dump() const {
18110b57cec5SDimitry Andric print(errs()); errs() << '\n';
18120b57cec5SDimitry Andric }
18130b57cec5SDimitry Andric #endif
18140b57cec5SDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,Immediate BaseOffset,bool HasBaseReg,int64_t Scale,Instruction * Fixup)18150b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
18160b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
18170fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset,
18180b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale,
18190b57cec5SDimitry Andric Instruction *Fixup /* = nullptr */) {
18200b57cec5SDimitry Andric switch (Kind) {
18210fca6ea1SDimitry Andric case LSRUse::Address: {
18220fca6ea1SDimitry Andric int64_t FixedOffset =
18230fca6ea1SDimitry Andric BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
18240fca6ea1SDimitry Andric int64_t ScalableOffset =
18250fca6ea1SDimitry Andric BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
18260fca6ea1SDimitry Andric return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
18270fca6ea1SDimitry Andric HasBaseReg, Scale, AccessTy.AddrSpace,
18280fca6ea1SDimitry Andric Fixup, ScalableOffset);
18290fca6ea1SDimitry Andric }
18300b57cec5SDimitry Andric case LSRUse::ICmpZero:
18310b57cec5SDimitry Andric // There's not even a target hook for querying whether it would be legal to
18320b57cec5SDimitry Andric // fold a GV into an ICmp.
18330b57cec5SDimitry Andric if (BaseGV)
18340b57cec5SDimitry Andric return false;
18350b57cec5SDimitry Andric
18360b57cec5SDimitry Andric // ICmp only has two operands; don't allow more than two non-trivial parts.
18370fca6ea1SDimitry Andric if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
18380b57cec5SDimitry Andric return false;
18390b57cec5SDimitry Andric
18400b57cec5SDimitry Andric // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
18410b57cec5SDimitry Andric // putting the scaled register in the other operand of the icmp.
18420b57cec5SDimitry Andric if (Scale != 0 && Scale != -1)
18430b57cec5SDimitry Andric return false;
18440b57cec5SDimitry Andric
18450b57cec5SDimitry Andric // If we have low-level target information, ask the target if it can fold an
18460b57cec5SDimitry Andric // integer immediate on an icmp.
18470fca6ea1SDimitry Andric if (BaseOffset.isNonZero()) {
18480fca6ea1SDimitry Andric // We don't have an interface to query whether the target supports
18490fca6ea1SDimitry Andric // icmpzero against scalable quantities yet.
18500fca6ea1SDimitry Andric if (BaseOffset.isScalable())
18510fca6ea1SDimitry Andric return false;
18520fca6ea1SDimitry Andric
18530b57cec5SDimitry Andric // We have one of:
18540b57cec5SDimitry Andric // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
18550b57cec5SDimitry Andric // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
18560b57cec5SDimitry Andric // Offs is the ICmp immediate.
18570b57cec5SDimitry Andric if (Scale == 0)
18580b57cec5SDimitry Andric // The cast does the right thing with
18590b57cec5SDimitry Andric // std::numeric_limits<int64_t>::min().
18600fca6ea1SDimitry Andric BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());
18610fca6ea1SDimitry Andric return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
18620b57cec5SDimitry Andric }
18630b57cec5SDimitry Andric
18640b57cec5SDimitry Andric // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
18650b57cec5SDimitry Andric return true;
18660b57cec5SDimitry Andric
18670b57cec5SDimitry Andric case LSRUse::Basic:
18680b57cec5SDimitry Andric // Only handle single-register values.
18690fca6ea1SDimitry Andric return !BaseGV && Scale == 0 && BaseOffset.isZero();
18700b57cec5SDimitry Andric
18710b57cec5SDimitry Andric case LSRUse::Special:
18720b57cec5SDimitry Andric // Special case Basic to handle -1 scales.
18730fca6ea1SDimitry Andric return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
18740b57cec5SDimitry Andric }
18750b57cec5SDimitry Andric
18760b57cec5SDimitry Andric llvm_unreachable("Invalid LSRUse Kind!");
18770b57cec5SDimitry Andric }
18780b57cec5SDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,Immediate MinOffset,Immediate MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,Immediate BaseOffset,bool HasBaseReg,int64_t Scale)18790b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
18800fca6ea1SDimitry Andric Immediate MinOffset, Immediate MaxOffset,
18810b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
18820fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset,
18830b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale) {
18840fca6ea1SDimitry Andric if (BaseOffset.isNonZero() &&
18850fca6ea1SDimitry Andric (BaseOffset.isScalable() != MinOffset.isScalable() ||
18860fca6ea1SDimitry Andric BaseOffset.isScalable() != MaxOffset.isScalable()))
18870fca6ea1SDimitry Andric return false;
18880b57cec5SDimitry Andric // Check for overflow.
18890fca6ea1SDimitry Andric int64_t Base = BaseOffset.getKnownMinValue();
18900fca6ea1SDimitry Andric int64_t Min = MinOffset.getKnownMinValue();
18910fca6ea1SDimitry Andric int64_t Max = MaxOffset.getKnownMinValue();
18920fca6ea1SDimitry Andric if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0))
18930b57cec5SDimitry Andric return false;
18940fca6ea1SDimitry Andric MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());
18950fca6ea1SDimitry Andric if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0))
18960b57cec5SDimitry Andric return false;
18970fca6ea1SDimitry Andric MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());
18980b57cec5SDimitry Andric
18990b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
19000b57cec5SDimitry Andric HasBaseReg, Scale) &&
19010b57cec5SDimitry Andric isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
19020b57cec5SDimitry Andric HasBaseReg, Scale);
19030b57cec5SDimitry Andric }
19040b57cec5SDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,Immediate MinOffset,Immediate MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F,const Loop & L)19050b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
19060fca6ea1SDimitry Andric Immediate MinOffset, Immediate MaxOffset,
19070b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
19080b57cec5SDimitry Andric const Formula &F, const Loop &L) {
19090b57cec5SDimitry Andric // For the purpose of isAMCompletelyFolded either having a canonical formula
19100b57cec5SDimitry Andric // or a scale not equal to zero is correct.
19110b57cec5SDimitry Andric // Problems may arise from non canonical formulae having a scale == 0.
19120b57cec5SDimitry Andric // Strictly speaking it would best to just rely on canonical formulae.
19130b57cec5SDimitry Andric // However, when we generate the scaled formulae, we first check that the
19140b57cec5SDimitry Andric // scaling factor is profitable before computing the actual ScaledReg for
19150b57cec5SDimitry Andric // compile time sake.
19160b57cec5SDimitry Andric assert((F.isCanonical(L) || F.Scale != 0));
19170b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
19180b57cec5SDimitry Andric F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
19190b57cec5SDimitry Andric }
19200b57cec5SDimitry Andric
19210b57cec5SDimitry Andric /// Test whether we know how to expand the current formula.
isLegalUse(const TargetTransformInfo & TTI,Immediate MinOffset,Immediate MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,Immediate BaseOffset,bool HasBaseReg,int64_t Scale)19220fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
19230fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind,
19240b57cec5SDimitry Andric MemAccessTy AccessTy, GlobalValue *BaseGV,
19250fca6ea1SDimitry Andric Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
19260b57cec5SDimitry Andric // We know how to expand completely foldable formulae.
19270b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
19280b57cec5SDimitry Andric BaseOffset, HasBaseReg, Scale) ||
19290b57cec5SDimitry Andric // Or formulae that use a base register produced by a sum of base
19300b57cec5SDimitry Andric // registers.
19310b57cec5SDimitry Andric (Scale == 1 &&
19320b57cec5SDimitry Andric isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
19330b57cec5SDimitry Andric BaseGV, BaseOffset, true, 0));
19340b57cec5SDimitry Andric }
19350b57cec5SDimitry Andric
isLegalUse(const TargetTransformInfo & TTI,Immediate MinOffset,Immediate MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)19360fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
19370fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind,
19380b57cec5SDimitry Andric MemAccessTy AccessTy, const Formula &F) {
19390b57cec5SDimitry Andric return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
19400b57cec5SDimitry Andric F.BaseOffset, F.HasBaseReg, F.Scale);
19410b57cec5SDimitry Andric }
19420b57cec5SDimitry Andric
isLegalAddImmediate(const TargetTransformInfo & TTI,Immediate Offset)19430fca6ea1SDimitry Andric static bool isLegalAddImmediate(const TargetTransformInfo &TTI,
19440fca6ea1SDimitry Andric Immediate Offset) {
19450fca6ea1SDimitry Andric if (Offset.isScalable())
19460fca6ea1SDimitry Andric return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());
19470fca6ea1SDimitry Andric
19480fca6ea1SDimitry Andric return TTI.isLegalAddImmediate(Offset.getFixedValue());
19490fca6ea1SDimitry Andric }
19500fca6ea1SDimitry Andric
isAMCompletelyFolded(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)19510b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
19520b57cec5SDimitry Andric const LSRUse &LU, const Formula &F) {
19530b57cec5SDimitry Andric // Target may want to look at the user instructions.
19540b57cec5SDimitry Andric if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
19550b57cec5SDimitry Andric for (const LSRFixup &Fixup : LU.Fixups)
19560b57cec5SDimitry Andric if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
19570b57cec5SDimitry Andric (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
19580b57cec5SDimitry Andric F.Scale, Fixup.UserInst))
19590b57cec5SDimitry Andric return false;
19600b57cec5SDimitry Andric return true;
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric
19630b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
19640b57cec5SDimitry Andric LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
19650b57cec5SDimitry Andric F.Scale);
19660b57cec5SDimitry Andric }
19670b57cec5SDimitry Andric
getScalingFactorCost(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F,const Loop & L)1968fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
19690b57cec5SDimitry Andric const LSRUse &LU, const Formula &F,
19700b57cec5SDimitry Andric const Loop &L) {
19710b57cec5SDimitry Andric if (!F.Scale)
19720b57cec5SDimitry Andric return 0;
19730b57cec5SDimitry Andric
19740b57cec5SDimitry Andric // If the use is not completely folded in that instruction, we will have to
19750b57cec5SDimitry Andric // pay an extra cost only for scale != 1.
19760b57cec5SDimitry Andric if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
19770b57cec5SDimitry Andric LU.AccessTy, F, L))
19780b57cec5SDimitry Andric return F.Scale != 1;
19790b57cec5SDimitry Andric
19800b57cec5SDimitry Andric switch (LU.Kind) {
19810b57cec5SDimitry Andric case LSRUse::Address: {
19820b57cec5SDimitry Andric // Check the scaling factor cost with both the min and max offsets.
19830fca6ea1SDimitry Andric int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
19840fca6ea1SDimitry Andric if (F.BaseOffset.isScalable()) {
19850fca6ea1SDimitry Andric ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
19860fca6ea1SDimitry Andric ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
19870fca6ea1SDimitry Andric } else {
19880fca6ea1SDimitry Andric FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
19890fca6ea1SDimitry Andric FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
19900fca6ea1SDimitry Andric }
1991fe6060f1SDimitry Andric InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
19920fca6ea1SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),
19930fca6ea1SDimitry Andric F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
1994fe6060f1SDimitry Andric InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
19950fca6ea1SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),
19960fca6ea1SDimitry Andric F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
19970b57cec5SDimitry Andric
1998fe6060f1SDimitry Andric assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
19990b57cec5SDimitry Andric "Legal addressing mode has an illegal cost!");
20000b57cec5SDimitry Andric return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
20010b57cec5SDimitry Andric }
20020b57cec5SDimitry Andric case LSRUse::ICmpZero:
20030b57cec5SDimitry Andric case LSRUse::Basic:
20040b57cec5SDimitry Andric case LSRUse::Special:
20050b57cec5SDimitry Andric // The use is completely folded, i.e., everything is folded into the
20060b57cec5SDimitry Andric // instruction.
20070b57cec5SDimitry Andric return 0;
20080b57cec5SDimitry Andric }
20090b57cec5SDimitry Andric
20100b57cec5SDimitry Andric llvm_unreachable("Invalid LSRUse Kind!");
20110b57cec5SDimitry Andric }
20120b57cec5SDimitry Andric
isAlwaysFoldable(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,Immediate BaseOffset,bool HasBaseReg)20130b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
20140b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy,
20150fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset,
20160b57cec5SDimitry Andric bool HasBaseReg) {
20170b57cec5SDimitry Andric // Fast-path: zero is always foldable.
20180fca6ea1SDimitry Andric if (BaseOffset.isZero() && !BaseGV)
20190fca6ea1SDimitry Andric return true;
20200b57cec5SDimitry Andric
20210b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a
20220b57cec5SDimitry Andric // base and a scale.
20230b57cec5SDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
20240b57cec5SDimitry Andric
20250b57cec5SDimitry Andric // Canonicalize a scale of 1 to a base register if the formula doesn't
20260b57cec5SDimitry Andric // already have a base register.
20270b57cec5SDimitry Andric if (!HasBaseReg && Scale == 1) {
20280b57cec5SDimitry Andric Scale = 0;
20290b57cec5SDimitry Andric HasBaseReg = true;
20300b57cec5SDimitry Andric }
20310b57cec5SDimitry Andric
20320fca6ea1SDimitry Andric // FIXME: Try with + without a scale? Maybe based on TTI?
20330fca6ea1SDimitry Andric // I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
20340fca6ea1SDimitry Andric // default for many architectures, not just AArch64 SVE. More investigation
20350fca6ea1SDimitry Andric // needed later to determine if this should be used more widely than just
20360fca6ea1SDimitry Andric // on scalable types.
20370fca6ea1SDimitry Andric if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
20380fca6ea1SDimitry Andric AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)
20390fca6ea1SDimitry Andric Scale = 0;
20400fca6ea1SDimitry Andric
20410b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
20420b57cec5SDimitry Andric HasBaseReg, Scale);
20430b57cec5SDimitry Andric }
20440b57cec5SDimitry Andric
isAlwaysFoldable(const TargetTransformInfo & TTI,ScalarEvolution & SE,Immediate MinOffset,Immediate MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const SCEV * S,bool HasBaseReg)20450b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
20460fca6ea1SDimitry Andric ScalarEvolution &SE, Immediate MinOffset,
20470fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind,
20480b57cec5SDimitry Andric MemAccessTy AccessTy, const SCEV *S,
20490b57cec5SDimitry Andric bool HasBaseReg) {
20500b57cec5SDimitry Andric // Fast-path: zero is always foldable.
20510b57cec5SDimitry Andric if (S->isZero()) return true;
20520b57cec5SDimitry Andric
20530b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a
20540b57cec5SDimitry Andric // base and a scale.
20550fca6ea1SDimitry Andric Immediate BaseOffset = ExtractImmediate(S, SE);
20560b57cec5SDimitry Andric GlobalValue *BaseGV = ExtractSymbol(S, SE);
20570b57cec5SDimitry Andric
20580b57cec5SDimitry Andric // If there's anything else involved, it's not foldable.
20590b57cec5SDimitry Andric if (!S->isZero()) return false;
20600b57cec5SDimitry Andric
20610b57cec5SDimitry Andric // Fast-path: zero is always foldable.
20620fca6ea1SDimitry Andric if (BaseOffset.isZero() && !BaseGV)
20630fca6ea1SDimitry Andric return true;
20640fca6ea1SDimitry Andric
20650fca6ea1SDimitry Andric if (BaseOffset.isScalable())
20660fca6ea1SDimitry Andric return false;
20670b57cec5SDimitry Andric
20680b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a
20690b57cec5SDimitry Andric // base and a scale.
20700b57cec5SDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
20710b57cec5SDimitry Andric
20720b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
20730b57cec5SDimitry Andric BaseOffset, HasBaseReg, Scale);
20740b57cec5SDimitry Andric }
20750b57cec5SDimitry Andric
20760b57cec5SDimitry Andric namespace {
20770b57cec5SDimitry Andric
20780b57cec5SDimitry Andric /// An individual increment in a Chain of IV increments. Relate an IV user to
20790b57cec5SDimitry Andric /// an expression that computes the IV it uses from the IV used by the previous
20800b57cec5SDimitry Andric /// link in the Chain.
20810b57cec5SDimitry Andric ///
20820b57cec5SDimitry Andric /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
20830b57cec5SDimitry Andric /// original IVOperand. The head of the chain's IVOperand is only valid during
20840b57cec5SDimitry Andric /// chain collection, before LSR replaces IV users. During chain generation,
20850b57cec5SDimitry Andric /// IncExpr can be used to find the new IVOperand that computes the same
20860b57cec5SDimitry Andric /// expression.
20870b57cec5SDimitry Andric struct IVInc {
20880b57cec5SDimitry Andric Instruction *UserInst;
20890b57cec5SDimitry Andric Value* IVOperand;
20900b57cec5SDimitry Andric const SCEV *IncExpr;
20910b57cec5SDimitry Andric
IVInc__anonc21373340a11::IVInc20920b57cec5SDimitry Andric IVInc(Instruction *U, Value *O, const SCEV *E)
20930b57cec5SDimitry Andric : UserInst(U), IVOperand(O), IncExpr(E) {}
20940b57cec5SDimitry Andric };
20950b57cec5SDimitry Andric
20960b57cec5SDimitry Andric // The list of IV increments in program order. We typically add the head of a
20970b57cec5SDimitry Andric // chain without finding subsequent links.
20980b57cec5SDimitry Andric struct IVChain {
20990b57cec5SDimitry Andric SmallVector<IVInc, 1> Incs;
21000b57cec5SDimitry Andric const SCEV *ExprBase = nullptr;
21010b57cec5SDimitry Andric
21020b57cec5SDimitry Andric IVChain() = default;
IVChain__anonc21373340a11::IVChain21030b57cec5SDimitry Andric IVChain(const IVInc &Head, const SCEV *Base)
21040b57cec5SDimitry Andric : Incs(1, Head), ExprBase(Base) {}
21050b57cec5SDimitry Andric
21060b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
21070b57cec5SDimitry Andric
21080b57cec5SDimitry Andric // Return the first increment in the chain.
begin__anonc21373340a11::IVChain21090b57cec5SDimitry Andric const_iterator begin() const {
21100b57cec5SDimitry Andric assert(!Incs.empty());
21110b57cec5SDimitry Andric return std::next(Incs.begin());
21120b57cec5SDimitry Andric }
end__anonc21373340a11::IVChain21130b57cec5SDimitry Andric const_iterator end() const {
21140b57cec5SDimitry Andric return Incs.end();
21150b57cec5SDimitry Andric }
21160b57cec5SDimitry Andric
21170b57cec5SDimitry Andric // Returns true if this chain contains any increments.
hasIncs__anonc21373340a11::IVChain21180b57cec5SDimitry Andric bool hasIncs() const { return Incs.size() >= 2; }
21190b57cec5SDimitry Andric
21200b57cec5SDimitry Andric // Add an IVInc to the end of this chain.
add__anonc21373340a11::IVChain21210b57cec5SDimitry Andric void add(const IVInc &X) { Incs.push_back(X); }
21220b57cec5SDimitry Andric
21230b57cec5SDimitry Andric // Returns the last UserInst in the chain.
tailUserInst__anonc21373340a11::IVChain21240b57cec5SDimitry Andric Instruction *tailUserInst() const { return Incs.back().UserInst; }
21250b57cec5SDimitry Andric
21260b57cec5SDimitry Andric // Returns true if IncExpr can be profitably added to this chain.
21270b57cec5SDimitry Andric bool isProfitableIncrement(const SCEV *OperExpr,
21280b57cec5SDimitry Andric const SCEV *IncExpr,
21290b57cec5SDimitry Andric ScalarEvolution&);
21300b57cec5SDimitry Andric };
21310b57cec5SDimitry Andric
21320b57cec5SDimitry Andric /// Helper for CollectChains to track multiple IV increment uses. Distinguish
21330b57cec5SDimitry Andric /// between FarUsers that definitely cross IV increments and NearUsers that may
21340b57cec5SDimitry Andric /// be used between IV increments.
21350b57cec5SDimitry Andric struct ChainUsers {
21360b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> FarUsers;
21370b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> NearUsers;
21380b57cec5SDimitry Andric };
21390b57cec5SDimitry Andric
21400b57cec5SDimitry Andric /// This class holds state for the main loop strength reduction logic.
21410b57cec5SDimitry Andric class LSRInstance {
21420b57cec5SDimitry Andric IVUsers &IU;
21430b57cec5SDimitry Andric ScalarEvolution &SE;
21440b57cec5SDimitry Andric DominatorTree &DT;
21450b57cec5SDimitry Andric LoopInfo &LI;
21460b57cec5SDimitry Andric AssumptionCache &AC;
21475ffd83dbSDimitry Andric TargetLibraryInfo &TLI;
21480b57cec5SDimitry Andric const TargetTransformInfo &TTI;
21490b57cec5SDimitry Andric Loop *const L;
21505ffd83dbSDimitry Andric MemorySSAUpdater *MSSAU;
2151fe6060f1SDimitry Andric TTI::AddressingModeKind AMK;
2152fcaf7f86SDimitry Andric mutable SCEVExpander Rewriter;
21530b57cec5SDimitry Andric bool Changed = false;
21540b57cec5SDimitry Andric
21550b57cec5SDimitry Andric /// This is the insert position that the current loop's induction variable
21560b57cec5SDimitry Andric /// increment should be placed. In simple loops, this is the latch block's
21570b57cec5SDimitry Andric /// terminator. But in more complicated cases, this is a position which will
21580b57cec5SDimitry Andric /// dominate all the in-loop post-increment users.
21590b57cec5SDimitry Andric Instruction *IVIncInsertPos = nullptr;
21600b57cec5SDimitry Andric
21610b57cec5SDimitry Andric /// Interesting factors between use strides.
21620b57cec5SDimitry Andric ///
21630b57cec5SDimitry Andric /// We explicitly use a SetVector which contains a SmallSet, instead of the
21640b57cec5SDimitry Andric /// default, a SmallDenseSet, because we need to use the full range of
21650b57cec5SDimitry Andric /// int64_ts, and there's currently no good way of doing that with
21660b57cec5SDimitry Andric /// SmallDenseSet.
21670b57cec5SDimitry Andric SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
21680b57cec5SDimitry Andric
2169bdd1243dSDimitry Andric /// The cost of the current SCEV, the best solution by LSR will be dropped if
2170bdd1243dSDimitry Andric /// the solution is not profitable.
2171bdd1243dSDimitry Andric Cost BaselineCost;
2172bdd1243dSDimitry Andric
21730b57cec5SDimitry Andric /// Interesting use types, to facilitate truncation reuse.
21740b57cec5SDimitry Andric SmallSetVector<Type *, 4> Types;
21750b57cec5SDimitry Andric
21760b57cec5SDimitry Andric /// The list of interesting uses.
21770b57cec5SDimitry Andric mutable SmallVector<LSRUse, 16> Uses;
21780b57cec5SDimitry Andric
21790b57cec5SDimitry Andric /// Track which uses use which register candidates.
21800b57cec5SDimitry Andric RegUseTracker RegUses;
21810b57cec5SDimitry Andric
21820b57cec5SDimitry Andric // Limit the number of chains to avoid quadratic behavior. We don't expect to
21830b57cec5SDimitry Andric // have more than a few IV increment chains in a loop. Missing a Chain falls
21840b57cec5SDimitry Andric // back to normal LSR behavior for those uses.
21850b57cec5SDimitry Andric static const unsigned MaxChains = 8;
21860b57cec5SDimitry Andric
21870b57cec5SDimitry Andric /// IV users can form a chain of IV increments.
21880b57cec5SDimitry Andric SmallVector<IVChain, MaxChains> IVChainVec;
21890b57cec5SDimitry Andric
21900b57cec5SDimitry Andric /// IV users that belong to profitable IVChains.
21910b57cec5SDimitry Andric SmallPtrSet<Use*, MaxChains> IVIncSet;
21920b57cec5SDimitry Andric
2193fe6060f1SDimitry Andric /// Induction variables that were generated and inserted by the SCEV Expander.
2194fe6060f1SDimitry Andric SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2195fe6060f1SDimitry Andric
21960b57cec5SDimitry Andric void OptimizeShadowIV();
21970b57cec5SDimitry Andric bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
21980b57cec5SDimitry Andric ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
21990b57cec5SDimitry Andric void OptimizeLoopTermCond();
22000b57cec5SDimitry Andric
22010b57cec5SDimitry Andric void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
22020b57cec5SDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec);
22030b57cec5SDimitry Andric void FinalizeChain(IVChain &Chain);
22040b57cec5SDimitry Andric void CollectChains();
2205fcaf7f86SDimitry Andric void GenerateIVChain(const IVChain &Chain,
22060b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22070b57cec5SDimitry Andric
22080b57cec5SDimitry Andric void CollectInterestingTypesAndFactors();
22090b57cec5SDimitry Andric void CollectFixupsAndInitialFormulae();
22100b57cec5SDimitry Andric
22110b57cec5SDimitry Andric // Support for sharing of LSRUses between LSRFixups.
22120b57cec5SDimitry Andric using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
22130b57cec5SDimitry Andric UseMapTy UseMap;
22140b57cec5SDimitry Andric
22150fca6ea1SDimitry Andric bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
22160b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy);
22170b57cec5SDimitry Andric
22180fca6ea1SDimitry Andric std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
22190b57cec5SDimitry Andric MemAccessTy AccessTy);
22200b57cec5SDimitry Andric
22210b57cec5SDimitry Andric void DeleteUse(LSRUse &LU, size_t LUIdx);
22220b57cec5SDimitry Andric
22230b57cec5SDimitry Andric LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
22240b57cec5SDimitry Andric
22250b57cec5SDimitry Andric void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
22260b57cec5SDimitry Andric void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
22270b57cec5SDimitry Andric void CountRegisters(const Formula &F, size_t LUIdx);
22280b57cec5SDimitry Andric bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
22290b57cec5SDimitry Andric
22300b57cec5SDimitry Andric void CollectLoopInvariantFixupsAndFormulae();
22310b57cec5SDimitry Andric
22320b57cec5SDimitry Andric void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
22330b57cec5SDimitry Andric unsigned Depth = 0);
22340b57cec5SDimitry Andric
22350b57cec5SDimitry Andric void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
22360b57cec5SDimitry Andric const Formula &Base, unsigned Depth,
22370b57cec5SDimitry Andric size_t Idx, bool IsScaledReg = false);
22380b57cec5SDimitry Andric void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
22390b57cec5SDimitry Andric void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
22400b57cec5SDimitry Andric const Formula &Base, size_t Idx,
22410b57cec5SDimitry Andric bool IsScaledReg = false);
22420b57cec5SDimitry Andric void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
22430b57cec5SDimitry Andric void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
22440b57cec5SDimitry Andric const Formula &Base,
22450fca6ea1SDimitry Andric const SmallVectorImpl<Immediate> &Worklist,
22460b57cec5SDimitry Andric size_t Idx, bool IsScaledReg = false);
22470b57cec5SDimitry Andric void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
22480b57cec5SDimitry Andric void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
22490b57cec5SDimitry Andric void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
22500b57cec5SDimitry Andric void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
22510b57cec5SDimitry Andric void GenerateCrossUseConstantOffsets();
22520b57cec5SDimitry Andric void GenerateAllReuseFormulae();
22530b57cec5SDimitry Andric
22540b57cec5SDimitry Andric void FilterOutUndesirableDedicatedRegisters();
22550b57cec5SDimitry Andric
22560b57cec5SDimitry Andric size_t EstimateSearchSpaceComplexity() const;
22570b57cec5SDimitry Andric void NarrowSearchSpaceByDetectingSupersets();
22580b57cec5SDimitry Andric void NarrowSearchSpaceByCollapsingUnrolledCode();
22590b57cec5SDimitry Andric void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
22600b57cec5SDimitry Andric void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
22615ffd83dbSDimitry Andric void NarrowSearchSpaceByFilterPostInc();
22620b57cec5SDimitry Andric void NarrowSearchSpaceByDeletingCostlyFormulas();
22630b57cec5SDimitry Andric void NarrowSearchSpaceByPickingWinnerRegs();
22640b57cec5SDimitry Andric void NarrowSearchSpaceUsingHeuristics();
22650b57cec5SDimitry Andric
22660b57cec5SDimitry Andric void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
22670b57cec5SDimitry Andric Cost &SolutionCost,
22680b57cec5SDimitry Andric SmallVectorImpl<const Formula *> &Workspace,
22690b57cec5SDimitry Andric const Cost &CurCost,
22700b57cec5SDimitry Andric const SmallPtrSet<const SCEV *, 16> &CurRegs,
22710b57cec5SDimitry Andric DenseSet<const SCEV *> &VisitedRegs) const;
22720b57cec5SDimitry Andric void Solve(SmallVectorImpl<const Formula *> &Solution) const;
22730b57cec5SDimitry Andric
22740b57cec5SDimitry Andric BasicBlock::iterator
22750b57cec5SDimitry Andric HoistInsertPosition(BasicBlock::iterator IP,
22760b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Inputs) const;
2277fcaf7f86SDimitry Andric BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
22780b57cec5SDimitry Andric const LSRFixup &LF,
2279fcaf7f86SDimitry Andric const LSRUse &LU) const;
22800b57cec5SDimitry Andric
22810b57cec5SDimitry Andric Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2282fcaf7f86SDimitry Andric BasicBlock::iterator IP,
22830b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22840b57cec5SDimitry Andric void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2285fcaf7f86SDimitry Andric const Formula &F,
22860b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22870b57cec5SDimitry Andric void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
22880b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22890b57cec5SDimitry Andric void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
22900b57cec5SDimitry Andric
22910b57cec5SDimitry Andric public:
22920b57cec5SDimitry Andric LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
22930b57cec5SDimitry Andric LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
22945ffd83dbSDimitry Andric TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
22950b57cec5SDimitry Andric
getChanged() const22960b57cec5SDimitry Andric bool getChanged() const { return Changed; }
getScalarEvolutionIVs() const2297fe6060f1SDimitry Andric const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
2298fe6060f1SDimitry Andric return ScalarEvolutionIVs;
2299fe6060f1SDimitry Andric }
23000b57cec5SDimitry Andric
23010b57cec5SDimitry Andric void print_factors_and_types(raw_ostream &OS) const;
23020b57cec5SDimitry Andric void print_fixups(raw_ostream &OS) const;
23030b57cec5SDimitry Andric void print_uses(raw_ostream &OS) const;
23040b57cec5SDimitry Andric void print(raw_ostream &OS) const;
23050b57cec5SDimitry Andric void dump() const;
23060b57cec5SDimitry Andric };
23070b57cec5SDimitry Andric
23080b57cec5SDimitry Andric } // end anonymous namespace
23090b57cec5SDimitry Andric
23100b57cec5SDimitry Andric /// If IV is used in a int-to-float cast inside the loop then try to eliminate
23110b57cec5SDimitry Andric /// the cast operation.
OptimizeShadowIV()23120b57cec5SDimitry Andric void LSRInstance::OptimizeShadowIV() {
23130b57cec5SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
23140b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
23150b57cec5SDimitry Andric return;
23160b57cec5SDimitry Andric
23170b57cec5SDimitry Andric for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
23180b57cec5SDimitry Andric UI != E; /* empty */) {
23190b57cec5SDimitry Andric IVUsers::const_iterator CandidateUI = UI;
23200b57cec5SDimitry Andric ++UI;
23210b57cec5SDimitry Andric Instruction *ShadowUse = CandidateUI->getUser();
23220b57cec5SDimitry Andric Type *DestTy = nullptr;
23230b57cec5SDimitry Andric bool IsSigned = false;
23240b57cec5SDimitry Andric
23250b57cec5SDimitry Andric /* If shadow use is a int->float cast then insert a second IV
23260b57cec5SDimitry Andric to eliminate this cast.
23270b57cec5SDimitry Andric
23280b57cec5SDimitry Andric for (unsigned i = 0; i < n; ++i)
23290b57cec5SDimitry Andric foo((double)i);
23300b57cec5SDimitry Andric
23310b57cec5SDimitry Andric is transformed into
23320b57cec5SDimitry Andric
23330b57cec5SDimitry Andric double d = 0.0;
23340b57cec5SDimitry Andric for (unsigned i = 0; i < n; ++i, ++d)
23350b57cec5SDimitry Andric foo(d);
23360b57cec5SDimitry Andric */
23370b57cec5SDimitry Andric if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
23380b57cec5SDimitry Andric IsSigned = false;
23390b57cec5SDimitry Andric DestTy = UCast->getDestTy();
23400b57cec5SDimitry Andric }
23410b57cec5SDimitry Andric else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
23420b57cec5SDimitry Andric IsSigned = true;
23430b57cec5SDimitry Andric DestTy = SCast->getDestTy();
23440b57cec5SDimitry Andric }
23450b57cec5SDimitry Andric if (!DestTy) continue;
23460b57cec5SDimitry Andric
23470b57cec5SDimitry Andric // If target does not support DestTy natively then do not apply
23480b57cec5SDimitry Andric // this transformation.
23490b57cec5SDimitry Andric if (!TTI.isTypeLegal(DestTy)) continue;
23500b57cec5SDimitry Andric
23510b57cec5SDimitry Andric PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
23520b57cec5SDimitry Andric if (!PH) continue;
23530b57cec5SDimitry Andric if (PH->getNumIncomingValues() != 2) continue;
23540b57cec5SDimitry Andric
23550b57cec5SDimitry Andric // If the calculation in integers overflows, the result in FP type will
23560b57cec5SDimitry Andric // differ. So we only can do this transformation if we are guaranteed to not
23570b57cec5SDimitry Andric // deal with overflowing values
23580b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
23590b57cec5SDimitry Andric if (!AR) continue;
23600b57cec5SDimitry Andric if (IsSigned && !AR->hasNoSignedWrap()) continue;
23610b57cec5SDimitry Andric if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
23620b57cec5SDimitry Andric
23630b57cec5SDimitry Andric Type *SrcTy = PH->getType();
23640b57cec5SDimitry Andric int Mantissa = DestTy->getFPMantissaWidth();
23650b57cec5SDimitry Andric if (Mantissa == -1) continue;
23660b57cec5SDimitry Andric if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
23670b57cec5SDimitry Andric continue;
23680b57cec5SDimitry Andric
23690b57cec5SDimitry Andric unsigned Entry, Latch;
23700b57cec5SDimitry Andric if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
23710b57cec5SDimitry Andric Entry = 0;
23720b57cec5SDimitry Andric Latch = 1;
23730b57cec5SDimitry Andric } else {
23740b57cec5SDimitry Andric Entry = 1;
23750b57cec5SDimitry Andric Latch = 0;
23760b57cec5SDimitry Andric }
23770b57cec5SDimitry Andric
23780b57cec5SDimitry Andric ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
23790b57cec5SDimitry Andric if (!Init) continue;
23800b57cec5SDimitry Andric Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
23810b57cec5SDimitry Andric (double)Init->getSExtValue() :
23820b57cec5SDimitry Andric (double)Init->getZExtValue());
23830b57cec5SDimitry Andric
23840b57cec5SDimitry Andric BinaryOperator *Incr =
23850b57cec5SDimitry Andric dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
23860b57cec5SDimitry Andric if (!Incr) continue;
23870b57cec5SDimitry Andric if (Incr->getOpcode() != Instruction::Add
23880b57cec5SDimitry Andric && Incr->getOpcode() != Instruction::Sub)
23890b57cec5SDimitry Andric continue;
23900b57cec5SDimitry Andric
23910b57cec5SDimitry Andric /* Initialize new IV, double d = 0.0 in above example. */
23920b57cec5SDimitry Andric ConstantInt *C = nullptr;
23930b57cec5SDimitry Andric if (Incr->getOperand(0) == PH)
23940b57cec5SDimitry Andric C = dyn_cast<ConstantInt>(Incr->getOperand(1));
23950b57cec5SDimitry Andric else if (Incr->getOperand(1) == PH)
23960b57cec5SDimitry Andric C = dyn_cast<ConstantInt>(Incr->getOperand(0));
23970b57cec5SDimitry Andric else
23980b57cec5SDimitry Andric continue;
23990b57cec5SDimitry Andric
24000b57cec5SDimitry Andric if (!C) continue;
24010b57cec5SDimitry Andric
24020b57cec5SDimitry Andric // Ignore negative constants, as the code below doesn't handle them
24030b57cec5SDimitry Andric // correctly. TODO: Remove this restriction.
24040fca6ea1SDimitry Andric if (!C->getValue().isStrictlyPositive())
24050fca6ea1SDimitry Andric continue;
24060b57cec5SDimitry Andric
24070b57cec5SDimitry Andric /* Add new PHINode. */
24080fca6ea1SDimitry Andric PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator());
24090fca6ea1SDimitry Andric NewPH->setDebugLoc(PH->getDebugLoc());
24100b57cec5SDimitry Andric
24110b57cec5SDimitry Andric /* create new increment. '++d' in above example. */
24120b57cec5SDimitry Andric Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
24130fca6ea1SDimitry Andric BinaryOperator *NewIncr = BinaryOperator::Create(
24140fca6ea1SDimitry Andric Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
24150fca6ea1SDimitry Andric : Instruction::FSub,
24160fca6ea1SDimitry Andric NewPH, CFP, "IV.S.next.", Incr->getIterator());
24170fca6ea1SDimitry Andric NewIncr->setDebugLoc(Incr->getDebugLoc());
24180b57cec5SDimitry Andric
24190b57cec5SDimitry Andric NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
24200b57cec5SDimitry Andric NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
24210b57cec5SDimitry Andric
24220b57cec5SDimitry Andric /* Remove cast operation */
24230b57cec5SDimitry Andric ShadowUse->replaceAllUsesWith(NewPH);
24240b57cec5SDimitry Andric ShadowUse->eraseFromParent();
24250b57cec5SDimitry Andric Changed = true;
24260b57cec5SDimitry Andric break;
24270b57cec5SDimitry Andric }
24280b57cec5SDimitry Andric }
24290b57cec5SDimitry Andric
24300b57cec5SDimitry Andric /// If Cond has an operand that is an expression of an IV, set the IV user and
24310b57cec5SDimitry Andric /// stride information and return true, otherwise return false.
FindIVUserForCond(ICmpInst * Cond,IVStrideUse * & CondUse)24320b57cec5SDimitry Andric bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
24330b57cec5SDimitry Andric for (IVStrideUse &U : IU)
24340b57cec5SDimitry Andric if (U.getUser() == Cond) {
24350b57cec5SDimitry Andric // NOTE: we could handle setcc instructions with multiple uses here, but
24360b57cec5SDimitry Andric // InstCombine does it as well for simple uses, it's not clear that it
24370b57cec5SDimitry Andric // occurs enough in real life to handle.
24380b57cec5SDimitry Andric CondUse = &U;
24390b57cec5SDimitry Andric return true;
24400b57cec5SDimitry Andric }
24410b57cec5SDimitry Andric return false;
24420b57cec5SDimitry Andric }
24430b57cec5SDimitry Andric
24440b57cec5SDimitry Andric /// Rewrite the loop's terminating condition if it uses a max computation.
24450b57cec5SDimitry Andric ///
24460b57cec5SDimitry Andric /// This is a narrow solution to a specific, but acute, problem. For loops
24470b57cec5SDimitry Andric /// like this:
24480b57cec5SDimitry Andric ///
24490b57cec5SDimitry Andric /// i = 0;
24500b57cec5SDimitry Andric /// do {
24510b57cec5SDimitry Andric /// p[i] = 0.0;
24520b57cec5SDimitry Andric /// } while (++i < n);
24530b57cec5SDimitry Andric ///
24540b57cec5SDimitry Andric /// the trip count isn't just 'n', because 'n' might not be positive. And
24550b57cec5SDimitry Andric /// unfortunately this can come up even for loops where the user didn't use
24560b57cec5SDimitry Andric /// a C do-while loop. For example, seemingly well-behaved top-test loops
24570b57cec5SDimitry Andric /// will commonly be lowered like this:
24580b57cec5SDimitry Andric ///
24590b57cec5SDimitry Andric /// if (n > 0) {
24600b57cec5SDimitry Andric /// i = 0;
24610b57cec5SDimitry Andric /// do {
24620b57cec5SDimitry Andric /// p[i] = 0.0;
24630b57cec5SDimitry Andric /// } while (++i < n);
24640b57cec5SDimitry Andric /// }
24650b57cec5SDimitry Andric ///
24660b57cec5SDimitry Andric /// and then it's possible for subsequent optimization to obscure the if
24670b57cec5SDimitry Andric /// test in such a way that indvars can't find it.
24680b57cec5SDimitry Andric ///
24690b57cec5SDimitry Andric /// When indvars can't find the if test in loops like this, it creates a
24700b57cec5SDimitry Andric /// max expression, which allows it to give the loop a canonical
24710b57cec5SDimitry Andric /// induction variable:
24720b57cec5SDimitry Andric ///
24730b57cec5SDimitry Andric /// i = 0;
24740b57cec5SDimitry Andric /// max = n < 1 ? 1 : n;
24750b57cec5SDimitry Andric /// do {
24760b57cec5SDimitry Andric /// p[i] = 0.0;
24770b57cec5SDimitry Andric /// } while (++i != max);
24780b57cec5SDimitry Andric ///
24790b57cec5SDimitry Andric /// Canonical induction variables are necessary because the loop passes
24800b57cec5SDimitry Andric /// are designed around them. The most obvious example of this is the
24810b57cec5SDimitry Andric /// LoopInfo analysis, which doesn't remember trip count values. It
24820b57cec5SDimitry Andric /// expects to be able to rediscover the trip count each time it is
24830b57cec5SDimitry Andric /// needed, and it does this using a simple analysis that only succeeds if
24840b57cec5SDimitry Andric /// the loop has a canonical induction variable.
24850b57cec5SDimitry Andric ///
24860b57cec5SDimitry Andric /// However, when it comes time to generate code, the maximum operation
24870b57cec5SDimitry Andric /// can be quite costly, especially if it's inside of an outer loop.
24880b57cec5SDimitry Andric ///
24890b57cec5SDimitry Andric /// This function solves this problem by detecting this type of loop and
24900b57cec5SDimitry Andric /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
24910b57cec5SDimitry Andric /// the instructions for the maximum computation.
OptimizeMax(ICmpInst * Cond,IVStrideUse * & CondUse)24920b57cec5SDimitry Andric ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
24930b57cec5SDimitry Andric // Check that the loop matches the pattern we're looking for.
24940b57cec5SDimitry Andric if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
24950b57cec5SDimitry Andric Cond->getPredicate() != CmpInst::ICMP_NE)
24960b57cec5SDimitry Andric return Cond;
24970b57cec5SDimitry Andric
24980b57cec5SDimitry Andric SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
24990b57cec5SDimitry Andric if (!Sel || !Sel->hasOneUse()) return Cond;
25000b57cec5SDimitry Andric
25010b57cec5SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
25020b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
25030b57cec5SDimitry Andric return Cond;
25040b57cec5SDimitry Andric const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
25050b57cec5SDimitry Andric
25060b57cec5SDimitry Andric // Add one to the backedge-taken count to get the trip count.
25070b57cec5SDimitry Andric const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
25080b57cec5SDimitry Andric if (IterationCount != SE.getSCEV(Sel)) return Cond;
25090b57cec5SDimitry Andric
25100b57cec5SDimitry Andric // Check for a max calculation that matches the pattern. There's no check
25110b57cec5SDimitry Andric // for ICMP_ULE here because the comparison would be with zero, which
25120b57cec5SDimitry Andric // isn't interesting.
25130b57cec5SDimitry Andric CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
25140b57cec5SDimitry Andric const SCEVNAryExpr *Max = nullptr;
25150b57cec5SDimitry Andric if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
25160b57cec5SDimitry Andric Pred = ICmpInst::ICMP_SLE;
25170b57cec5SDimitry Andric Max = S;
25180b57cec5SDimitry Andric } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
25190b57cec5SDimitry Andric Pred = ICmpInst::ICMP_SLT;
25200b57cec5SDimitry Andric Max = S;
25210b57cec5SDimitry Andric } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
25220b57cec5SDimitry Andric Pred = ICmpInst::ICMP_ULT;
25230b57cec5SDimitry Andric Max = U;
25240b57cec5SDimitry Andric } else {
25250b57cec5SDimitry Andric // No match; bail.
25260b57cec5SDimitry Andric return Cond;
25270b57cec5SDimitry Andric }
25280b57cec5SDimitry Andric
25290b57cec5SDimitry Andric // To handle a max with more than two operands, this optimization would
25300b57cec5SDimitry Andric // require additional checking and setup.
25310b57cec5SDimitry Andric if (Max->getNumOperands() != 2)
25320b57cec5SDimitry Andric return Cond;
25330b57cec5SDimitry Andric
25340b57cec5SDimitry Andric const SCEV *MaxLHS = Max->getOperand(0);
25350b57cec5SDimitry Andric const SCEV *MaxRHS = Max->getOperand(1);
25360b57cec5SDimitry Andric
25370b57cec5SDimitry Andric // ScalarEvolution canonicalizes constants to the left. For < and >, look
25380b57cec5SDimitry Andric // for a comparison with 1. For <= and >=, a comparison with zero.
25390b57cec5SDimitry Andric if (!MaxLHS ||
25400b57cec5SDimitry Andric (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
25410b57cec5SDimitry Andric return Cond;
25420b57cec5SDimitry Andric
25430b57cec5SDimitry Andric // Check the relevant induction variable for conformance to
25440b57cec5SDimitry Andric // the pattern.
25450b57cec5SDimitry Andric const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
25460b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
25470b57cec5SDimitry Andric if (!AR || !AR->isAffine() ||
25480b57cec5SDimitry Andric AR->getStart() != One ||
25490b57cec5SDimitry Andric AR->getStepRecurrence(SE) != One)
25500b57cec5SDimitry Andric return Cond;
25510b57cec5SDimitry Andric
25520b57cec5SDimitry Andric assert(AR->getLoop() == L &&
25530b57cec5SDimitry Andric "Loop condition operand is an addrec in a different loop!");
25540b57cec5SDimitry Andric
25550b57cec5SDimitry Andric // Check the right operand of the select, and remember it, as it will
25560b57cec5SDimitry Andric // be used in the new comparison instruction.
25570b57cec5SDimitry Andric Value *NewRHS = nullptr;
25580b57cec5SDimitry Andric if (ICmpInst::isTrueWhenEqual(Pred)) {
25590b57cec5SDimitry Andric // Look for n+1, and grab n.
25600b57cec5SDimitry Andric if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
25610b57cec5SDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
25620b57cec5SDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
25630b57cec5SDimitry Andric NewRHS = BO->getOperand(0);
25640b57cec5SDimitry Andric if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
25650b57cec5SDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
25660b57cec5SDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
25670b57cec5SDimitry Andric NewRHS = BO->getOperand(0);
25680b57cec5SDimitry Andric if (!NewRHS)
25690b57cec5SDimitry Andric return Cond;
25700b57cec5SDimitry Andric } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
25710b57cec5SDimitry Andric NewRHS = Sel->getOperand(1);
25720b57cec5SDimitry Andric else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
25730b57cec5SDimitry Andric NewRHS = Sel->getOperand(2);
25740b57cec5SDimitry Andric else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
25750b57cec5SDimitry Andric NewRHS = SU->getValue();
25760b57cec5SDimitry Andric else
25770b57cec5SDimitry Andric // Max doesn't match expected pattern.
25780b57cec5SDimitry Andric return Cond;
25790b57cec5SDimitry Andric
25800b57cec5SDimitry Andric // Determine the new comparison opcode. It may be signed or unsigned,
25810b57cec5SDimitry Andric // and the original comparison may be either equality or inequality.
25820b57cec5SDimitry Andric if (Cond->getPredicate() == CmpInst::ICMP_EQ)
25830b57cec5SDimitry Andric Pred = CmpInst::getInversePredicate(Pred);
25840b57cec5SDimitry Andric
25850b57cec5SDimitry Andric // Ok, everything looks ok to change the condition into an SLT or SGE and
25860b57cec5SDimitry Andric // delete the max calculation.
25870fca6ea1SDimitry Andric ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,
25880fca6ea1SDimitry Andric Cond->getOperand(0), NewRHS, "scmp");
25890b57cec5SDimitry Andric
25900b57cec5SDimitry Andric // Delete the max calculation instructions.
2591fe6060f1SDimitry Andric NewCond->setDebugLoc(Cond->getDebugLoc());
25920b57cec5SDimitry Andric Cond->replaceAllUsesWith(NewCond);
25930b57cec5SDimitry Andric CondUse->setUser(NewCond);
25940b57cec5SDimitry Andric Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
25950b57cec5SDimitry Andric Cond->eraseFromParent();
25960b57cec5SDimitry Andric Sel->eraseFromParent();
25970b57cec5SDimitry Andric if (Cmp->use_empty())
25980b57cec5SDimitry Andric Cmp->eraseFromParent();
25990b57cec5SDimitry Andric return NewCond;
26000b57cec5SDimitry Andric }
26010b57cec5SDimitry Andric
26020b57cec5SDimitry Andric /// Change loop terminating condition to use the postinc iv when possible.
26030b57cec5SDimitry Andric void
OptimizeLoopTermCond()26040b57cec5SDimitry Andric LSRInstance::OptimizeLoopTermCond() {
26050b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> PostIncs;
26060b57cec5SDimitry Andric
26070b57cec5SDimitry Andric // We need a different set of heuristics for rotated and non-rotated loops.
26080b57cec5SDimitry Andric // If a loop is rotated then the latch is also the backedge, so inserting
26090b57cec5SDimitry Andric // post-inc expressions just before the latch is ideal. To reduce live ranges
26100b57cec5SDimitry Andric // it also makes sense to rewrite terminating conditions to use post-inc
26110b57cec5SDimitry Andric // expressions.
26120b57cec5SDimitry Andric //
26130b57cec5SDimitry Andric // If the loop is not rotated then the latch is not a backedge; the latch
26140b57cec5SDimitry Andric // check is done in the loop head. Adding post-inc expressions before the
26150b57cec5SDimitry Andric // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
26160b57cec5SDimitry Andric // in the loop body. In this case we do *not* want to use post-inc expressions
26170b57cec5SDimitry Andric // in the latch check, and we want to insert post-inc expressions before
26180b57cec5SDimitry Andric // the backedge.
26190b57cec5SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch();
26200b57cec5SDimitry Andric SmallVector<BasicBlock*, 8> ExitingBlocks;
26210b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks);
2622bdd1243dSDimitry Andric if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {
26230b57cec5SDimitry Andric // The backedge doesn't exit the loop; treat this as a head-tested loop.
26240b57cec5SDimitry Andric IVIncInsertPos = LatchBlock->getTerminator();
26250b57cec5SDimitry Andric return;
26260b57cec5SDimitry Andric }
26270b57cec5SDimitry Andric
26280b57cec5SDimitry Andric // Otherwise treat this as a rotated loop.
26290b57cec5SDimitry Andric for (BasicBlock *ExitingBlock : ExitingBlocks) {
26300b57cec5SDimitry Andric // Get the terminating condition for the loop if possible. If we
26310b57cec5SDimitry Andric // can, we want to change it to use a post-incremented version of its
26320b57cec5SDimitry Andric // induction variable, to allow coalescing the live ranges for the IV into
26330b57cec5SDimitry Andric // one register value.
26340b57cec5SDimitry Andric
26350b57cec5SDimitry Andric BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
26360b57cec5SDimitry Andric if (!TermBr)
26370b57cec5SDimitry Andric continue;
26380b57cec5SDimitry Andric // FIXME: Overly conservative, termination condition could be an 'or' etc..
26390b57cec5SDimitry Andric if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
26400b57cec5SDimitry Andric continue;
26410b57cec5SDimitry Andric
26420b57cec5SDimitry Andric // Search IVUsesByStride to find Cond's IVUse if there is one.
26430b57cec5SDimitry Andric IVStrideUse *CondUse = nullptr;
26440b57cec5SDimitry Andric ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
26450b57cec5SDimitry Andric if (!FindIVUserForCond(Cond, CondUse))
26460b57cec5SDimitry Andric continue;
26470b57cec5SDimitry Andric
26480b57cec5SDimitry Andric // If the trip count is computed in terms of a max (due to ScalarEvolution
26490b57cec5SDimitry Andric // being unable to find a sufficient guard, for example), change the loop
26500b57cec5SDimitry Andric // comparison to use SLT or ULT instead of NE.
26510b57cec5SDimitry Andric // One consequence of doing this now is that it disrupts the count-down
26520b57cec5SDimitry Andric // optimization. That's not always a bad thing though, because in such
26530b57cec5SDimitry Andric // cases it may still be worthwhile to avoid a max.
26540b57cec5SDimitry Andric Cond = OptimizeMax(Cond, CondUse);
26550b57cec5SDimitry Andric
26560b57cec5SDimitry Andric // If this exiting block dominates the latch block, it may also use
26570b57cec5SDimitry Andric // the post-inc value if it won't be shared with other uses.
26580b57cec5SDimitry Andric // Check for dominance.
26590b57cec5SDimitry Andric if (!DT.dominates(ExitingBlock, LatchBlock))
26600b57cec5SDimitry Andric continue;
26610b57cec5SDimitry Andric
26620b57cec5SDimitry Andric // Conservatively avoid trying to use the post-inc value in non-latch
26630b57cec5SDimitry Andric // exits if there may be pre-inc users in intervening blocks.
26640b57cec5SDimitry Andric if (LatchBlock != ExitingBlock)
26650b57cec5SDimitry Andric for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
26660b57cec5SDimitry Andric // Test if the use is reachable from the exiting block. This dominator
26670b57cec5SDimitry Andric // query is a conservative approximation of reachability.
26680b57cec5SDimitry Andric if (&*UI != CondUse &&
26690b57cec5SDimitry Andric !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
26700b57cec5SDimitry Andric // Conservatively assume there may be reuse if the quotient of their
26710b57cec5SDimitry Andric // strides could be a legal scale.
26720b57cec5SDimitry Andric const SCEV *A = IU.getStride(*CondUse, L);
26730b57cec5SDimitry Andric const SCEV *B = IU.getStride(*UI, L);
26740b57cec5SDimitry Andric if (!A || !B) continue;
26750b57cec5SDimitry Andric if (SE.getTypeSizeInBits(A->getType()) !=
26760b57cec5SDimitry Andric SE.getTypeSizeInBits(B->getType())) {
26770b57cec5SDimitry Andric if (SE.getTypeSizeInBits(A->getType()) >
26780b57cec5SDimitry Andric SE.getTypeSizeInBits(B->getType()))
26790b57cec5SDimitry Andric B = SE.getSignExtendExpr(B, A->getType());
26800b57cec5SDimitry Andric else
26810b57cec5SDimitry Andric A = SE.getSignExtendExpr(A, B->getType());
26820b57cec5SDimitry Andric }
26830b57cec5SDimitry Andric if (const SCEVConstant *D =
26840b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
26850b57cec5SDimitry Andric const ConstantInt *C = D->getValue();
26860b57cec5SDimitry Andric // Stride of one or negative one can have reuse with non-addresses.
26870b57cec5SDimitry Andric if (C->isOne() || C->isMinusOne())
26880b57cec5SDimitry Andric goto decline_post_inc;
26890b57cec5SDimitry Andric // Avoid weird situations.
269006c3fb27SDimitry Andric if (C->getValue().getSignificantBits() >= 64 ||
26910b57cec5SDimitry Andric C->getValue().isMinSignedValue())
26920b57cec5SDimitry Andric goto decline_post_inc;
26930b57cec5SDimitry Andric // Check for possible scaled-address reuse.
26940b57cec5SDimitry Andric if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
26950b57cec5SDimitry Andric MemAccessTy AccessTy = getAccessType(
26960b57cec5SDimitry Andric TTI, UI->getUser(), UI->getOperandValToReplace());
26970b57cec5SDimitry Andric int64_t Scale = C->getSExtValue();
26980b57cec5SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
26990b57cec5SDimitry Andric /*BaseOffset=*/0,
270006c3fb27SDimitry Andric /*HasBaseReg=*/true, Scale,
27010b57cec5SDimitry Andric AccessTy.AddrSpace))
27020b57cec5SDimitry Andric goto decline_post_inc;
27030b57cec5SDimitry Andric Scale = -Scale;
27040b57cec5SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
27050b57cec5SDimitry Andric /*BaseOffset=*/0,
270606c3fb27SDimitry Andric /*HasBaseReg=*/true, Scale,
27070b57cec5SDimitry Andric AccessTy.AddrSpace))
27080b57cec5SDimitry Andric goto decline_post_inc;
27090b57cec5SDimitry Andric }
27100b57cec5SDimitry Andric }
27110b57cec5SDimitry Andric }
27120b57cec5SDimitry Andric
27130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
27140b57cec5SDimitry Andric << *Cond << '\n');
27150b57cec5SDimitry Andric
27160b57cec5SDimitry Andric // It's possible for the setcc instruction to be anywhere in the loop, and
27170b57cec5SDimitry Andric // possible for it to have multiple users. If it is not immediately before
27180b57cec5SDimitry Andric // the exiting block branch, move it.
2719fe6060f1SDimitry Andric if (Cond->getNextNonDebugInstruction() != TermBr) {
27200b57cec5SDimitry Andric if (Cond->hasOneUse()) {
27210b57cec5SDimitry Andric Cond->moveBefore(TermBr);
27220b57cec5SDimitry Andric } else {
27230b57cec5SDimitry Andric // Clone the terminating condition and insert into the loopend.
27240b57cec5SDimitry Andric ICmpInst *OldCond = Cond;
27250b57cec5SDimitry Andric Cond = cast<ICmpInst>(Cond->clone());
27260b57cec5SDimitry Andric Cond->setName(L->getHeader()->getName() + ".termcond");
2727bdd1243dSDimitry Andric Cond->insertInto(ExitingBlock, TermBr->getIterator());
27280b57cec5SDimitry Andric
27290b57cec5SDimitry Andric // Clone the IVUse, as the old use still exists!
27300b57cec5SDimitry Andric CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
27310b57cec5SDimitry Andric TermBr->replaceUsesOfWith(OldCond, Cond);
27320b57cec5SDimitry Andric }
27330b57cec5SDimitry Andric }
27340b57cec5SDimitry Andric
27350b57cec5SDimitry Andric // If we get to here, we know that we can transform the setcc instruction to
27360b57cec5SDimitry Andric // use the post-incremented version of the IV, allowing us to coalesce the
27370b57cec5SDimitry Andric // live ranges for the IV correctly.
27380b57cec5SDimitry Andric CondUse->transformToPostInc(L);
27390b57cec5SDimitry Andric Changed = true;
27400b57cec5SDimitry Andric
27410b57cec5SDimitry Andric PostIncs.insert(Cond);
27420b57cec5SDimitry Andric decline_post_inc:;
27430b57cec5SDimitry Andric }
27440b57cec5SDimitry Andric
27450b57cec5SDimitry Andric // Determine an insertion point for the loop induction variable increment. It
27460b57cec5SDimitry Andric // must dominate all the post-inc comparisons we just set up, and it must
27470b57cec5SDimitry Andric // dominate the loop latch edge.
27480b57cec5SDimitry Andric IVIncInsertPos = L->getLoopLatch()->getTerminator();
2749bdd1243dSDimitry Andric for (Instruction *Inst : PostIncs)
2750bdd1243dSDimitry Andric IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
27510b57cec5SDimitry Andric }
27520b57cec5SDimitry Andric
27530b57cec5SDimitry Andric /// Determine if the given use can accommodate a fixup at the given offset and
27540b57cec5SDimitry Andric /// other details. If so, update the use and return true.
reconcileNewOffset(LSRUse & LU,Immediate NewOffset,bool HasBaseReg,LSRUse::KindType Kind,MemAccessTy AccessTy)27550fca6ea1SDimitry Andric bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
27560b57cec5SDimitry Andric bool HasBaseReg, LSRUse::KindType Kind,
27570b57cec5SDimitry Andric MemAccessTy AccessTy) {
27580fca6ea1SDimitry Andric Immediate NewMinOffset = LU.MinOffset;
27590fca6ea1SDimitry Andric Immediate NewMaxOffset = LU.MaxOffset;
27600b57cec5SDimitry Andric MemAccessTy NewAccessTy = AccessTy;
27610b57cec5SDimitry Andric
27620b57cec5SDimitry Andric // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
27630b57cec5SDimitry Andric // something conservative, however this can pessimize in the case that one of
27640b57cec5SDimitry Andric // the uses will have all its uses outside the loop, for example.
27650b57cec5SDimitry Andric if (LU.Kind != Kind)
27660b57cec5SDimitry Andric return false;
27670b57cec5SDimitry Andric
27680b57cec5SDimitry Andric // Check for a mismatched access type, and fall back conservatively as needed.
27690b57cec5SDimitry Andric // TODO: Be less conservative when the type is similar and can use the same
27700b57cec5SDimitry Andric // addressing modes.
27710b57cec5SDimitry Andric if (Kind == LSRUse::Address) {
27720b57cec5SDimitry Andric if (AccessTy.MemTy != LU.AccessTy.MemTy) {
27730b57cec5SDimitry Andric NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
27740b57cec5SDimitry Andric AccessTy.AddrSpace);
27750b57cec5SDimitry Andric }
27760b57cec5SDimitry Andric }
27770b57cec5SDimitry Andric
27780b57cec5SDimitry Andric // Conservatively assume HasBaseReg is true for now.
27790fca6ea1SDimitry Andric if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
27800b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
27810b57cec5SDimitry Andric LU.MaxOffset - NewOffset, HasBaseReg))
27820b57cec5SDimitry Andric return false;
27830b57cec5SDimitry Andric NewMinOffset = NewOffset;
27840fca6ea1SDimitry Andric } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
27850b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
27860b57cec5SDimitry Andric NewOffset - LU.MinOffset, HasBaseReg))
27870b57cec5SDimitry Andric return false;
27880b57cec5SDimitry Andric NewMaxOffset = NewOffset;
27890b57cec5SDimitry Andric }
27900b57cec5SDimitry Andric
27910fca6ea1SDimitry Andric // FIXME: We should be able to handle some level of scalable offset support
27920fca6ea1SDimitry Andric // for 'void', but in order to get basic support up and running this is
27930fca6ea1SDimitry Andric // being left out.
27940fca6ea1SDimitry Andric if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
27950fca6ea1SDimitry Andric (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
27960fca6ea1SDimitry Andric return false;
27970fca6ea1SDimitry Andric
27980b57cec5SDimitry Andric // Update the use.
27990b57cec5SDimitry Andric LU.MinOffset = NewMinOffset;
28000b57cec5SDimitry Andric LU.MaxOffset = NewMaxOffset;
28010b57cec5SDimitry Andric LU.AccessTy = NewAccessTy;
28020b57cec5SDimitry Andric return true;
28030b57cec5SDimitry Andric }
28040b57cec5SDimitry Andric
28050b57cec5SDimitry Andric /// Return an LSRUse index and an offset value for a fixup which needs the given
28060b57cec5SDimitry Andric /// expression, with the given kind and optional access type. Either reuse an
28070b57cec5SDimitry Andric /// existing use or create a new one, as needed.
getUse(const SCEV * & Expr,LSRUse::KindType Kind,MemAccessTy AccessTy)28080fca6ea1SDimitry Andric std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
28090b57cec5SDimitry Andric LSRUse::KindType Kind,
28100b57cec5SDimitry Andric MemAccessTy AccessTy) {
28110b57cec5SDimitry Andric const SCEV *Copy = Expr;
28120fca6ea1SDimitry Andric Immediate Offset = ExtractImmediate(Expr, SE);
28130b57cec5SDimitry Andric
28140b57cec5SDimitry Andric // Basic uses can't accept any offset, for example.
28150b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
28160b57cec5SDimitry Andric Offset, /*HasBaseReg=*/ true)) {
28170b57cec5SDimitry Andric Expr = Copy;
28180fca6ea1SDimitry Andric Offset = Immediate::getFixed(0);
28190b57cec5SDimitry Andric }
28200b57cec5SDimitry Andric
28210b57cec5SDimitry Andric std::pair<UseMapTy::iterator, bool> P =
28220b57cec5SDimitry Andric UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
28230b57cec5SDimitry Andric if (!P.second) {
28240b57cec5SDimitry Andric // A use already existed with this base.
28250b57cec5SDimitry Andric size_t LUIdx = P.first->second;
28260b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
28270b57cec5SDimitry Andric if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
28280b57cec5SDimitry Andric // Reuse this use.
28290b57cec5SDimitry Andric return std::make_pair(LUIdx, Offset);
28300b57cec5SDimitry Andric }
28310b57cec5SDimitry Andric
28320b57cec5SDimitry Andric // Create a new use.
28330b57cec5SDimitry Andric size_t LUIdx = Uses.size();
28340b57cec5SDimitry Andric P.first->second = LUIdx;
28350b57cec5SDimitry Andric Uses.push_back(LSRUse(Kind, AccessTy));
28360b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
28370b57cec5SDimitry Andric
28380b57cec5SDimitry Andric LU.MinOffset = Offset;
28390b57cec5SDimitry Andric LU.MaxOffset = Offset;
28400b57cec5SDimitry Andric return std::make_pair(LUIdx, Offset);
28410b57cec5SDimitry Andric }
28420b57cec5SDimitry Andric
28430b57cec5SDimitry Andric /// Delete the given use from the Uses list.
DeleteUse(LSRUse & LU,size_t LUIdx)28440b57cec5SDimitry Andric void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
28450b57cec5SDimitry Andric if (&LU != &Uses.back())
28460b57cec5SDimitry Andric std::swap(LU, Uses.back());
28470b57cec5SDimitry Andric Uses.pop_back();
28480b57cec5SDimitry Andric
28490b57cec5SDimitry Andric // Update RegUses.
28500b57cec5SDimitry Andric RegUses.swapAndDropUse(LUIdx, Uses.size());
28510b57cec5SDimitry Andric }
28520b57cec5SDimitry Andric
28530b57cec5SDimitry Andric /// Look for a use distinct from OrigLU which is has a formula that has the same
28540b57cec5SDimitry Andric /// registers as the given formula.
28550b57cec5SDimitry Andric LSRUse *
FindUseWithSimilarFormula(const Formula & OrigF,const LSRUse & OrigLU)28560b57cec5SDimitry Andric LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
28570b57cec5SDimitry Andric const LSRUse &OrigLU) {
28580b57cec5SDimitry Andric // Search all uses for the formula. This could be more clever.
285906c3fb27SDimitry Andric for (LSRUse &LU : Uses) {
28600b57cec5SDimitry Andric // Check whether this use is close enough to OrigLU, to see whether it's
28610b57cec5SDimitry Andric // worthwhile looking through its formulae.
28620b57cec5SDimitry Andric // Ignore ICmpZero uses because they may contain formulae generated by
28630b57cec5SDimitry Andric // GenerateICmpZeroScales, in which case adding fixup offsets may
28640b57cec5SDimitry Andric // be invalid.
28650b57cec5SDimitry Andric if (&LU != &OrigLU &&
28660b57cec5SDimitry Andric LU.Kind != LSRUse::ICmpZero &&
28670b57cec5SDimitry Andric LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
28680b57cec5SDimitry Andric LU.WidestFixupType == OrigLU.WidestFixupType &&
28690b57cec5SDimitry Andric LU.HasFormulaWithSameRegs(OrigF)) {
28700b57cec5SDimitry Andric // Scan through this use's formulae.
28710b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) {
28720b57cec5SDimitry Andric // Check to see if this formula has the same registers and symbols
28730b57cec5SDimitry Andric // as OrigF.
28740b57cec5SDimitry Andric if (F.BaseRegs == OrigF.BaseRegs &&
28750b57cec5SDimitry Andric F.ScaledReg == OrigF.ScaledReg &&
28760b57cec5SDimitry Andric F.BaseGV == OrigF.BaseGV &&
28770b57cec5SDimitry Andric F.Scale == OrigF.Scale &&
28780b57cec5SDimitry Andric F.UnfoldedOffset == OrigF.UnfoldedOffset) {
28790fca6ea1SDimitry Andric if (F.BaseOffset.isZero())
28800b57cec5SDimitry Andric return &LU;
28810b57cec5SDimitry Andric // This is the formula where all the registers and symbols matched;
28820b57cec5SDimitry Andric // there aren't going to be any others. Since we declined it, we
28830b57cec5SDimitry Andric // can skip the rest of the formulae and proceed to the next LSRUse.
28840b57cec5SDimitry Andric break;
28850b57cec5SDimitry Andric }
28860b57cec5SDimitry Andric }
28870b57cec5SDimitry Andric }
28880b57cec5SDimitry Andric }
28890b57cec5SDimitry Andric
28900b57cec5SDimitry Andric // Nothing looked good.
28910b57cec5SDimitry Andric return nullptr;
28920b57cec5SDimitry Andric }
28930b57cec5SDimitry Andric
CollectInterestingTypesAndFactors()28940b57cec5SDimitry Andric void LSRInstance::CollectInterestingTypesAndFactors() {
28950b57cec5SDimitry Andric SmallSetVector<const SCEV *, 4> Strides;
28960b57cec5SDimitry Andric
28970b57cec5SDimitry Andric // Collect interesting types and strides.
28980b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Worklist;
28990b57cec5SDimitry Andric for (const IVStrideUse &U : IU) {
29000b57cec5SDimitry Andric const SCEV *Expr = IU.getExpr(U);
290106c3fb27SDimitry Andric if (!Expr)
290206c3fb27SDimitry Andric continue;
29030b57cec5SDimitry Andric
29040b57cec5SDimitry Andric // Collect interesting types.
29050b57cec5SDimitry Andric Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
29060b57cec5SDimitry Andric
29070b57cec5SDimitry Andric // Add strides for mentioned loops.
29080b57cec5SDimitry Andric Worklist.push_back(Expr);
29090b57cec5SDimitry Andric do {
29100b57cec5SDimitry Andric const SCEV *S = Worklist.pop_back_val();
29110b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
29120b57cec5SDimitry Andric if (AR->getLoop() == L)
29130b57cec5SDimitry Andric Strides.insert(AR->getStepRecurrence(SE));
29140b57cec5SDimitry Andric Worklist.push_back(AR->getStart());
29150b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2916bdd1243dSDimitry Andric append_range(Worklist, Add->operands());
29170b57cec5SDimitry Andric }
29180b57cec5SDimitry Andric } while (!Worklist.empty());
29190b57cec5SDimitry Andric }
29200b57cec5SDimitry Andric
29210b57cec5SDimitry Andric // Compute interesting factors from the set of interesting strides.
29220b57cec5SDimitry Andric for (SmallSetVector<const SCEV *, 4>::const_iterator
29230b57cec5SDimitry Andric I = Strides.begin(), E = Strides.end(); I != E; ++I)
29240b57cec5SDimitry Andric for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
29250b57cec5SDimitry Andric std::next(I); NewStrideIter != E; ++NewStrideIter) {
29260b57cec5SDimitry Andric const SCEV *OldStride = *I;
29270b57cec5SDimitry Andric const SCEV *NewStride = *NewStrideIter;
29280b57cec5SDimitry Andric
29290b57cec5SDimitry Andric if (SE.getTypeSizeInBits(OldStride->getType()) !=
29300b57cec5SDimitry Andric SE.getTypeSizeInBits(NewStride->getType())) {
29310b57cec5SDimitry Andric if (SE.getTypeSizeInBits(OldStride->getType()) >
29320b57cec5SDimitry Andric SE.getTypeSizeInBits(NewStride->getType()))
29330b57cec5SDimitry Andric NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
29340b57cec5SDimitry Andric else
29350b57cec5SDimitry Andric OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
29360b57cec5SDimitry Andric }
29370b57cec5SDimitry Andric if (const SCEVConstant *Factor =
29380b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
29390b57cec5SDimitry Andric SE, true))) {
294006c3fb27SDimitry Andric if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
29410b57cec5SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue());
29420b57cec5SDimitry Andric } else if (const SCEVConstant *Factor =
29430b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
29440b57cec5SDimitry Andric NewStride,
29450b57cec5SDimitry Andric SE, true))) {
294606c3fb27SDimitry Andric if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
29470b57cec5SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue());
29480b57cec5SDimitry Andric }
29490b57cec5SDimitry Andric }
29500b57cec5SDimitry Andric
29510b57cec5SDimitry Andric // If all uses use the same type, don't bother looking for truncation-based
29520b57cec5SDimitry Andric // reuse.
29530b57cec5SDimitry Andric if (Types.size() == 1)
29540b57cec5SDimitry Andric Types.clear();
29550b57cec5SDimitry Andric
29560b57cec5SDimitry Andric LLVM_DEBUG(print_factors_and_types(dbgs()));
29570b57cec5SDimitry Andric }
29580b57cec5SDimitry Andric
29590b57cec5SDimitry Andric /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
29600b57cec5SDimitry Andric /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
29610b57cec5SDimitry Andric /// IVStrideUses, we could partially skip this.
29620b57cec5SDimitry Andric static User::op_iterator
findIVOperand(User::op_iterator OI,User::op_iterator OE,Loop * L,ScalarEvolution & SE)29630b57cec5SDimitry Andric findIVOperand(User::op_iterator OI, User::op_iterator OE,
29640b57cec5SDimitry Andric Loop *L, ScalarEvolution &SE) {
29650b57cec5SDimitry Andric for(; OI != OE; ++OI) {
29660b57cec5SDimitry Andric if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
29670b57cec5SDimitry Andric if (!SE.isSCEVable(Oper->getType()))
29680b57cec5SDimitry Andric continue;
29690b57cec5SDimitry Andric
29700b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR =
29710b57cec5SDimitry Andric dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
29720b57cec5SDimitry Andric if (AR->getLoop() == L)
29730b57cec5SDimitry Andric break;
29740b57cec5SDimitry Andric }
29750b57cec5SDimitry Andric }
29760b57cec5SDimitry Andric }
29770b57cec5SDimitry Andric return OI;
29780b57cec5SDimitry Andric }
29790b57cec5SDimitry Andric
29800b57cec5SDimitry Andric /// IVChain logic must consistently peek base TruncInst operands, so wrap it in
29810b57cec5SDimitry Andric /// a convenient helper.
getWideOperand(Value * Oper)29820b57cec5SDimitry Andric static Value *getWideOperand(Value *Oper) {
29830b57cec5SDimitry Andric if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
29840b57cec5SDimitry Andric return Trunc->getOperand(0);
29850b57cec5SDimitry Andric return Oper;
29860b57cec5SDimitry Andric }
29870b57cec5SDimitry Andric
29880b57cec5SDimitry Andric /// Return an approximation of this SCEV expression's "base", or NULL for any
29890b57cec5SDimitry Andric /// constant. Returning the expression itself is conservative. Returning a
29900b57cec5SDimitry Andric /// deeper subexpression is more precise and valid as long as it isn't less
29910b57cec5SDimitry Andric /// complex than another subexpression. For expressions involving multiple
29920b57cec5SDimitry Andric /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
29930b57cec5SDimitry Andric /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
29940b57cec5SDimitry Andric /// IVInc==b-a.
29950b57cec5SDimitry Andric ///
29960b57cec5SDimitry Andric /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
29970b57cec5SDimitry Andric /// SCEVUnknown, we simply return the rightmost SCEV operand.
getExprBase(const SCEV * S)29980b57cec5SDimitry Andric static const SCEV *getExprBase(const SCEV *S) {
29990b57cec5SDimitry Andric switch (S->getSCEVType()) {
300006c3fb27SDimitry Andric default: // including scUnknown.
30010b57cec5SDimitry Andric return S;
30020b57cec5SDimitry Andric case scConstant:
300306c3fb27SDimitry Andric case scVScale:
30040b57cec5SDimitry Andric return nullptr;
30050b57cec5SDimitry Andric case scTruncate:
30060b57cec5SDimitry Andric return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
30070b57cec5SDimitry Andric case scZeroExtend:
30080b57cec5SDimitry Andric return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
30090b57cec5SDimitry Andric case scSignExtend:
30100b57cec5SDimitry Andric return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
30110b57cec5SDimitry Andric case scAddExpr: {
30120b57cec5SDimitry Andric // Skip over scaled operands (scMulExpr) to follow add operands as long as
30130b57cec5SDimitry Andric // there's nothing more complex.
30140b57cec5SDimitry Andric // FIXME: not sure if we want to recognize negation.
30150b57cec5SDimitry Andric const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
3016349cc55cSDimitry Andric for (const SCEV *SubExpr : reverse(Add->operands())) {
30170b57cec5SDimitry Andric if (SubExpr->getSCEVType() == scAddExpr)
30180b57cec5SDimitry Andric return getExprBase(SubExpr);
30190b57cec5SDimitry Andric
30200b57cec5SDimitry Andric if (SubExpr->getSCEVType() != scMulExpr)
30210b57cec5SDimitry Andric return SubExpr;
30220b57cec5SDimitry Andric }
30230b57cec5SDimitry Andric return S; // all operands are scaled, be conservative.
30240b57cec5SDimitry Andric }
30250b57cec5SDimitry Andric case scAddRecExpr:
30260b57cec5SDimitry Andric return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
30270b57cec5SDimitry Andric }
3028e8d8bef9SDimitry Andric llvm_unreachable("Unknown SCEV kind!");
30290b57cec5SDimitry Andric }
30300b57cec5SDimitry Andric
30310b57cec5SDimitry Andric /// Return true if the chain increment is profitable to expand into a loop
30320b57cec5SDimitry Andric /// invariant value, which may require its own register. A profitable chain
30330b57cec5SDimitry Andric /// increment will be an offset relative to the same base. We allow such offsets
30340b57cec5SDimitry Andric /// to potentially be used as chain increment as long as it's not obviously
30350b57cec5SDimitry Andric /// expensive to expand using real instructions.
isProfitableIncrement(const SCEV * OperExpr,const SCEV * IncExpr,ScalarEvolution & SE)30360b57cec5SDimitry Andric bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
30370b57cec5SDimitry Andric const SCEV *IncExpr,
30380b57cec5SDimitry Andric ScalarEvolution &SE) {
30390b57cec5SDimitry Andric // Aggressively form chains when -stress-ivchain.
30400b57cec5SDimitry Andric if (StressIVChain)
30410b57cec5SDimitry Andric return true;
30420b57cec5SDimitry Andric
30430b57cec5SDimitry Andric // Do not replace a constant offset from IV head with a nonconstant IV
30440b57cec5SDimitry Andric // increment.
30450b57cec5SDimitry Andric if (!isa<SCEVConstant>(IncExpr)) {
30460b57cec5SDimitry Andric const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
30470b57cec5SDimitry Andric if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
30480b57cec5SDimitry Andric return false;
30490b57cec5SDimitry Andric }
30500b57cec5SDimitry Andric
30510b57cec5SDimitry Andric SmallPtrSet<const SCEV*, 8> Processed;
30520b57cec5SDimitry Andric return !isHighCostExpansion(IncExpr, Processed, SE);
30530b57cec5SDimitry Andric }
30540b57cec5SDimitry Andric
30550b57cec5SDimitry Andric /// Return true if the number of registers needed for the chain is estimated to
30560b57cec5SDimitry Andric /// be less than the number required for the individual IV users. First prohibit
30570b57cec5SDimitry Andric /// any IV users that keep the IV live across increments (the Users set should
30580b57cec5SDimitry Andric /// be empty). Next count the number and type of increments in the chain.
30590b57cec5SDimitry Andric ///
30600b57cec5SDimitry Andric /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
30610b57cec5SDimitry Andric /// effectively use postinc addressing modes. Only consider it profitable it the
30620b57cec5SDimitry Andric /// increments can be computed in fewer registers when chained.
30630b57cec5SDimitry Andric ///
30640b57cec5SDimitry Andric /// TODO: Consider IVInc free if it's already used in another chains.
isProfitableChain(IVChain & Chain,SmallPtrSetImpl<Instruction * > & Users,ScalarEvolution & SE,const TargetTransformInfo & TTI)30655ffd83dbSDimitry Andric static bool isProfitableChain(IVChain &Chain,
30665ffd83dbSDimitry Andric SmallPtrSetImpl<Instruction *> &Users,
30675ffd83dbSDimitry Andric ScalarEvolution &SE,
30685ffd83dbSDimitry Andric const TargetTransformInfo &TTI) {
30690b57cec5SDimitry Andric if (StressIVChain)
30700b57cec5SDimitry Andric return true;
30710b57cec5SDimitry Andric
30720b57cec5SDimitry Andric if (!Chain.hasIncs())
30730b57cec5SDimitry Andric return false;
30740b57cec5SDimitry Andric
30750b57cec5SDimitry Andric if (!Users.empty()) {
30760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
30770b57cec5SDimitry Andric for (Instruction *Inst
30780b57cec5SDimitry Andric : Users) { dbgs() << " " << *Inst << "\n"; });
30790b57cec5SDimitry Andric return false;
30800b57cec5SDimitry Andric }
30810b57cec5SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
30820b57cec5SDimitry Andric
30830b57cec5SDimitry Andric // The chain itself may require a register, so intialize cost to 1.
30840b57cec5SDimitry Andric int cost = 1;
30850b57cec5SDimitry Andric
30860b57cec5SDimitry Andric // A complete chain likely eliminates the need for keeping the original IV in
30870b57cec5SDimitry Andric // a register. LSR does not currently know how to form a complete chain unless
30880b57cec5SDimitry Andric // the header phi already exists.
30890b57cec5SDimitry Andric if (isa<PHINode>(Chain.tailUserInst())
30900b57cec5SDimitry Andric && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
30910b57cec5SDimitry Andric --cost;
30920b57cec5SDimitry Andric }
30930b57cec5SDimitry Andric const SCEV *LastIncExpr = nullptr;
30940b57cec5SDimitry Andric unsigned NumConstIncrements = 0;
30950b57cec5SDimitry Andric unsigned NumVarIncrements = 0;
30960b57cec5SDimitry Andric unsigned NumReusedIncrements = 0;
30975ffd83dbSDimitry Andric
30985ffd83dbSDimitry Andric if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
30995ffd83dbSDimitry Andric return true;
31005ffd83dbSDimitry Andric
31010b57cec5SDimitry Andric for (const IVInc &Inc : Chain) {
31025ffd83dbSDimitry Andric if (TTI.isProfitableLSRChainElement(Inc.UserInst))
31035ffd83dbSDimitry Andric return true;
31040b57cec5SDimitry Andric if (Inc.IncExpr->isZero())
31050b57cec5SDimitry Andric continue;
31060b57cec5SDimitry Andric
31070b57cec5SDimitry Andric // Incrementing by zero or some constant is neutral. We assume constants can
31080b57cec5SDimitry Andric // be folded into an addressing mode or an add's immediate operand.
31090b57cec5SDimitry Andric if (isa<SCEVConstant>(Inc.IncExpr)) {
31100b57cec5SDimitry Andric ++NumConstIncrements;
31110b57cec5SDimitry Andric continue;
31120b57cec5SDimitry Andric }
31130b57cec5SDimitry Andric
31140b57cec5SDimitry Andric if (Inc.IncExpr == LastIncExpr)
31150b57cec5SDimitry Andric ++NumReusedIncrements;
31160b57cec5SDimitry Andric else
31170b57cec5SDimitry Andric ++NumVarIncrements;
31180b57cec5SDimitry Andric
31190b57cec5SDimitry Andric LastIncExpr = Inc.IncExpr;
31200b57cec5SDimitry Andric }
31210b57cec5SDimitry Andric // An IV chain with a single increment is handled by LSR's postinc
31220b57cec5SDimitry Andric // uses. However, a chain with multiple increments requires keeping the IV's
31230b57cec5SDimitry Andric // value live longer than it needs to be if chained.
31240b57cec5SDimitry Andric if (NumConstIncrements > 1)
31250b57cec5SDimitry Andric --cost;
31260b57cec5SDimitry Andric
31270b57cec5SDimitry Andric // Materializing increment expressions in the preheader that didn't exist in
31280b57cec5SDimitry Andric // the original code may cost a register. For example, sign-extended array
31290b57cec5SDimitry Andric // indices can produce ridiculous increments like this:
31300b57cec5SDimitry Andric // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
31310b57cec5SDimitry Andric cost += NumVarIncrements;
31320b57cec5SDimitry Andric
31330b57cec5SDimitry Andric // Reusing variable increments likely saves a register to hold the multiple of
31340b57cec5SDimitry Andric // the stride.
31350b57cec5SDimitry Andric cost -= NumReusedIncrements;
31360b57cec5SDimitry Andric
31370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
31380b57cec5SDimitry Andric << "\n");
31390b57cec5SDimitry Andric
31400b57cec5SDimitry Andric return cost < 0;
31410b57cec5SDimitry Andric }
31420b57cec5SDimitry Andric
31430b57cec5SDimitry Andric /// Add this IV user to an existing chain or make it the head of a new chain.
ChainInstruction(Instruction * UserInst,Instruction * IVOper,SmallVectorImpl<ChainUsers> & ChainUsersVec)31440b57cec5SDimitry Andric void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
31450b57cec5SDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec) {
31460b57cec5SDimitry Andric // When IVs are used as types of varying widths, they are generally converted
31470b57cec5SDimitry Andric // to a wider type with some uses remaining narrow under a (free) trunc.
31480b57cec5SDimitry Andric Value *const NextIV = getWideOperand(IVOper);
31490b57cec5SDimitry Andric const SCEV *const OperExpr = SE.getSCEV(NextIV);
31500b57cec5SDimitry Andric const SCEV *const OperExprBase = getExprBase(OperExpr);
31510b57cec5SDimitry Andric
31520b57cec5SDimitry Andric // Visit all existing chains. Check if its IVOper can be computed as a
31530b57cec5SDimitry Andric // profitable loop invariant increment from the last link in the Chain.
31540b57cec5SDimitry Andric unsigned ChainIdx = 0, NChains = IVChainVec.size();
31550b57cec5SDimitry Andric const SCEV *LastIncExpr = nullptr;
31560b57cec5SDimitry Andric for (; ChainIdx < NChains; ++ChainIdx) {
31570b57cec5SDimitry Andric IVChain &Chain = IVChainVec[ChainIdx];
31580b57cec5SDimitry Andric
31590b57cec5SDimitry Andric // Prune the solution space aggressively by checking that both IV operands
31600b57cec5SDimitry Andric // are expressions that operate on the same unscaled SCEVUnknown. This
31610b57cec5SDimitry Andric // "base" will be canceled by the subsequent getMinusSCEV call. Checking
31620b57cec5SDimitry Andric // first avoids creating extra SCEV expressions.
31630b57cec5SDimitry Andric if (!StressIVChain && Chain.ExprBase != OperExprBase)
31640b57cec5SDimitry Andric continue;
31650b57cec5SDimitry Andric
31660b57cec5SDimitry Andric Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
31675f757f3fSDimitry Andric if (PrevIV->getType() != NextIV->getType())
31680b57cec5SDimitry Andric continue;
31690b57cec5SDimitry Andric
31700b57cec5SDimitry Andric // A phi node terminates a chain.
31710b57cec5SDimitry Andric if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
31720b57cec5SDimitry Andric continue;
31730b57cec5SDimitry Andric
31740b57cec5SDimitry Andric // The increment must be loop-invariant so it can be kept in a register.
31750b57cec5SDimitry Andric const SCEV *PrevExpr = SE.getSCEV(PrevIV);
31760b57cec5SDimitry Andric const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
3177fe6060f1SDimitry Andric if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))
31780b57cec5SDimitry Andric continue;
31790b57cec5SDimitry Andric
31800b57cec5SDimitry Andric if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
31810b57cec5SDimitry Andric LastIncExpr = IncExpr;
31820b57cec5SDimitry Andric break;
31830b57cec5SDimitry Andric }
31840b57cec5SDimitry Andric }
31850b57cec5SDimitry Andric // If we haven't found a chain, create a new one, unless we hit the max. Don't
31860b57cec5SDimitry Andric // bother for phi nodes, because they must be last in the chain.
31870b57cec5SDimitry Andric if (ChainIdx == NChains) {
31880b57cec5SDimitry Andric if (isa<PHINode>(UserInst))
31890b57cec5SDimitry Andric return;
31900b57cec5SDimitry Andric if (NChains >= MaxChains && !StressIVChain) {
31910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
31920b57cec5SDimitry Andric return;
31930b57cec5SDimitry Andric }
31940b57cec5SDimitry Andric LastIncExpr = OperExpr;
31950b57cec5SDimitry Andric // IVUsers may have skipped over sign/zero extensions. We don't currently
31960b57cec5SDimitry Andric // attempt to form chains involving extensions unless they can be hoisted
31970b57cec5SDimitry Andric // into this loop's AddRec.
31980b57cec5SDimitry Andric if (!isa<SCEVAddRecExpr>(LastIncExpr))
31990b57cec5SDimitry Andric return;
32000b57cec5SDimitry Andric ++NChains;
32010b57cec5SDimitry Andric IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
32020b57cec5SDimitry Andric OperExprBase));
32030b57cec5SDimitry Andric ChainUsersVec.resize(NChains);
32040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
32050b57cec5SDimitry Andric << ") IV=" << *LastIncExpr << "\n");
32060b57cec5SDimitry Andric } else {
32070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
32080b57cec5SDimitry Andric << ") IV+" << *LastIncExpr << "\n");
32090b57cec5SDimitry Andric // Add this IV user to the end of the chain.
32100b57cec5SDimitry Andric IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
32110b57cec5SDimitry Andric }
32120b57cec5SDimitry Andric IVChain &Chain = IVChainVec[ChainIdx];
32130b57cec5SDimitry Andric
32140b57cec5SDimitry Andric SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
32150b57cec5SDimitry Andric // This chain's NearUsers become FarUsers.
32160b57cec5SDimitry Andric if (!LastIncExpr->isZero()) {
32170b57cec5SDimitry Andric ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
32180b57cec5SDimitry Andric NearUsers.end());
32190b57cec5SDimitry Andric NearUsers.clear();
32200b57cec5SDimitry Andric }
32210b57cec5SDimitry Andric
32220b57cec5SDimitry Andric // All other uses of IVOperand become near uses of the chain.
32230b57cec5SDimitry Andric // We currently ignore intermediate values within SCEV expressions, assuming
32240b57cec5SDimitry Andric // they will eventually be used be the current chain, or can be computed
32250b57cec5SDimitry Andric // from one of the chain increments. To be more precise we could
32260b57cec5SDimitry Andric // transitively follow its user and only add leaf IV users to the set.
32270b57cec5SDimitry Andric for (User *U : IVOper->users()) {
32280b57cec5SDimitry Andric Instruction *OtherUse = dyn_cast<Instruction>(U);
32290b57cec5SDimitry Andric if (!OtherUse)
32300b57cec5SDimitry Andric continue;
32310b57cec5SDimitry Andric // Uses in the chain will no longer be uses if the chain is formed.
32320b57cec5SDimitry Andric // Include the head of the chain in this iteration (not Chain.begin()).
32330b57cec5SDimitry Andric IVChain::const_iterator IncIter = Chain.Incs.begin();
32340b57cec5SDimitry Andric IVChain::const_iterator IncEnd = Chain.Incs.end();
32350b57cec5SDimitry Andric for( ; IncIter != IncEnd; ++IncIter) {
32360b57cec5SDimitry Andric if (IncIter->UserInst == OtherUse)
32370b57cec5SDimitry Andric break;
32380b57cec5SDimitry Andric }
32390b57cec5SDimitry Andric if (IncIter != IncEnd)
32400b57cec5SDimitry Andric continue;
32410b57cec5SDimitry Andric
32420b57cec5SDimitry Andric if (SE.isSCEVable(OtherUse->getType())
32430b57cec5SDimitry Andric && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
32440b57cec5SDimitry Andric && IU.isIVUserOrOperand(OtherUse)) {
32450b57cec5SDimitry Andric continue;
32460b57cec5SDimitry Andric }
32470b57cec5SDimitry Andric NearUsers.insert(OtherUse);
32480b57cec5SDimitry Andric }
32490b57cec5SDimitry Andric
32500b57cec5SDimitry Andric // Since this user is part of the chain, it's no longer considered a use
32510b57cec5SDimitry Andric // of the chain.
32520b57cec5SDimitry Andric ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
32530b57cec5SDimitry Andric }
32540b57cec5SDimitry Andric
32550b57cec5SDimitry Andric /// Populate the vector of Chains.
32560b57cec5SDimitry Andric ///
32570b57cec5SDimitry Andric /// This decreases ILP at the architecture level. Targets with ample registers,
32580b57cec5SDimitry Andric /// multiple memory ports, and no register renaming probably don't want
32590b57cec5SDimitry Andric /// this. However, such targets should probably disable LSR altogether.
32600b57cec5SDimitry Andric ///
32610b57cec5SDimitry Andric /// The job of LSR is to make a reasonable choice of induction variables across
32620b57cec5SDimitry Andric /// the loop. Subsequent passes can easily "unchain" computation exposing more
32630b57cec5SDimitry Andric /// ILP *within the loop* if the target wants it.
32640b57cec5SDimitry Andric ///
32650b57cec5SDimitry Andric /// Finding the best IV chain is potentially a scheduling problem. Since LSR
32660b57cec5SDimitry Andric /// will not reorder memory operations, it will recognize this as a chain, but
32670b57cec5SDimitry Andric /// will generate redundant IV increments. Ideally this would be corrected later
32680b57cec5SDimitry Andric /// by a smart scheduler:
32690b57cec5SDimitry Andric /// = A[i]
32700b57cec5SDimitry Andric /// = A[i+x]
32710b57cec5SDimitry Andric /// A[i] =
32720b57cec5SDimitry Andric /// A[i+x] =
32730b57cec5SDimitry Andric ///
32740b57cec5SDimitry Andric /// TODO: Walk the entire domtree within this loop, not just the path to the
32750b57cec5SDimitry Andric /// loop latch. This will discover chains on side paths, but requires
32760b57cec5SDimitry Andric /// maintaining multiple copies of the Chains state.
CollectChains()32770b57cec5SDimitry Andric void LSRInstance::CollectChains() {
32780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
32790b57cec5SDimitry Andric SmallVector<ChainUsers, 8> ChainUsersVec;
32800b57cec5SDimitry Andric
32810b57cec5SDimitry Andric SmallVector<BasicBlock *,8> LatchPath;
32820b57cec5SDimitry Andric BasicBlock *LoopHeader = L->getHeader();
32830b57cec5SDimitry Andric for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
32840b57cec5SDimitry Andric Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
32850b57cec5SDimitry Andric LatchPath.push_back(Rung->getBlock());
32860b57cec5SDimitry Andric }
32870b57cec5SDimitry Andric LatchPath.push_back(LoopHeader);
32880b57cec5SDimitry Andric
32890b57cec5SDimitry Andric // Walk the instruction stream from the loop header to the loop latch.
32900b57cec5SDimitry Andric for (BasicBlock *BB : reverse(LatchPath)) {
32910b57cec5SDimitry Andric for (Instruction &I : *BB) {
32920b57cec5SDimitry Andric // Skip instructions that weren't seen by IVUsers analysis.
32930b57cec5SDimitry Andric if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
32940b57cec5SDimitry Andric continue;
32950b57cec5SDimitry Andric
32960b57cec5SDimitry Andric // Ignore users that are part of a SCEV expression. This way we only
32970b57cec5SDimitry Andric // consider leaf IV Users. This effectively rediscovers a portion of
32980b57cec5SDimitry Andric // IVUsers analysis but in program order this time.
32990b57cec5SDimitry Andric if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
33000b57cec5SDimitry Andric continue;
33010b57cec5SDimitry Andric
33020b57cec5SDimitry Andric // Remove this instruction from any NearUsers set it may be in.
33030b57cec5SDimitry Andric for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
33040b57cec5SDimitry Andric ChainIdx < NChains; ++ChainIdx) {
33050b57cec5SDimitry Andric ChainUsersVec[ChainIdx].NearUsers.erase(&I);
33060b57cec5SDimitry Andric }
33070b57cec5SDimitry Andric // Search for operands that can be chained.
33080b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> UniqueOperands;
33090b57cec5SDimitry Andric User::op_iterator IVOpEnd = I.op_end();
33100b57cec5SDimitry Andric User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
33110b57cec5SDimitry Andric while (IVOpIter != IVOpEnd) {
33120b57cec5SDimitry Andric Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
33130b57cec5SDimitry Andric if (UniqueOperands.insert(IVOpInst).second)
33140b57cec5SDimitry Andric ChainInstruction(&I, IVOpInst, ChainUsersVec);
33150b57cec5SDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
33160b57cec5SDimitry Andric }
33170b57cec5SDimitry Andric } // Continue walking down the instructions.
33180b57cec5SDimitry Andric } // Continue walking down the domtree.
33190b57cec5SDimitry Andric // Visit phi backedges to determine if the chain can generate the IV postinc.
33200b57cec5SDimitry Andric for (PHINode &PN : L->getHeader()->phis()) {
33210b57cec5SDimitry Andric if (!SE.isSCEVable(PN.getType()))
33220b57cec5SDimitry Andric continue;
33230b57cec5SDimitry Andric
33240b57cec5SDimitry Andric Instruction *IncV =
33250b57cec5SDimitry Andric dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
33260b57cec5SDimitry Andric if (IncV)
33270b57cec5SDimitry Andric ChainInstruction(&PN, IncV, ChainUsersVec);
33280b57cec5SDimitry Andric }
33290b57cec5SDimitry Andric // Remove any unprofitable chains.
33300b57cec5SDimitry Andric unsigned ChainIdx = 0;
33310b57cec5SDimitry Andric for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
33320b57cec5SDimitry Andric UsersIdx < NChains; ++UsersIdx) {
33330b57cec5SDimitry Andric if (!isProfitableChain(IVChainVec[UsersIdx],
33345ffd83dbSDimitry Andric ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
33350b57cec5SDimitry Andric continue;
33360b57cec5SDimitry Andric // Preserve the chain at UsesIdx.
33370b57cec5SDimitry Andric if (ChainIdx != UsersIdx)
33380b57cec5SDimitry Andric IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
33390b57cec5SDimitry Andric FinalizeChain(IVChainVec[ChainIdx]);
33400b57cec5SDimitry Andric ++ChainIdx;
33410b57cec5SDimitry Andric }
33420b57cec5SDimitry Andric IVChainVec.resize(ChainIdx);
33430b57cec5SDimitry Andric }
33440b57cec5SDimitry Andric
FinalizeChain(IVChain & Chain)33450b57cec5SDimitry Andric void LSRInstance::FinalizeChain(IVChain &Chain) {
33460b57cec5SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
33470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
33480b57cec5SDimitry Andric
33490b57cec5SDimitry Andric for (const IVInc &Inc : Chain) {
33500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
33510b57cec5SDimitry Andric auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
33520b57cec5SDimitry Andric assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
33530b57cec5SDimitry Andric IVIncSet.insert(UseI);
33540b57cec5SDimitry Andric }
33550b57cec5SDimitry Andric }
33560b57cec5SDimitry Andric
33570b57cec5SDimitry Andric /// Return true if the IVInc can be folded into an addressing mode.
canFoldIVIncExpr(const SCEV * IncExpr,Instruction * UserInst,Value * Operand,const TargetTransformInfo & TTI)33580b57cec5SDimitry Andric static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
33590b57cec5SDimitry Andric Value *Operand, const TargetTransformInfo &TTI) {
33600b57cec5SDimitry Andric const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
33610fca6ea1SDimitry Andric Immediate IncOffset = Immediate::getZero();
33620fca6ea1SDimitry Andric if (IncConst) {
33630fca6ea1SDimitry Andric if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)
33640b57cec5SDimitry Andric return false;
33650fca6ea1SDimitry Andric IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());
33660fca6ea1SDimitry Andric } else {
33670fca6ea1SDimitry Andric // Look for mul(vscale, constant), to detect a scalable offset.
33680fca6ea1SDimitry Andric auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
33690fca6ea1SDimitry Andric if (!IncVScale || IncVScale->getNumOperands() != 2 ||
33700fca6ea1SDimitry Andric !isa<SCEVVScale>(IncVScale->getOperand(1)))
33710fca6ea1SDimitry Andric return false;
33720fca6ea1SDimitry Andric auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
33730fca6ea1SDimitry Andric if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
33740fca6ea1SDimitry Andric return false;
33750fca6ea1SDimitry Andric IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
33760fca6ea1SDimitry Andric }
33770b57cec5SDimitry Andric
33780fca6ea1SDimitry Andric if (!isAddressUse(TTI, UserInst, Operand))
33790b57cec5SDimitry Andric return false;
33800b57cec5SDimitry Andric
33810b57cec5SDimitry Andric MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
33820b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
33830b57cec5SDimitry Andric IncOffset, /*HasBaseReg=*/false))
33840b57cec5SDimitry Andric return false;
33850b57cec5SDimitry Andric
33860b57cec5SDimitry Andric return true;
33870b57cec5SDimitry Andric }
33880b57cec5SDimitry Andric
33890b57cec5SDimitry Andric /// Generate an add or subtract for each IVInc in a chain to materialize the IV
33900b57cec5SDimitry Andric /// user's operand from the previous IV user's operand.
GenerateIVChain(const IVChain & Chain,SmallVectorImpl<WeakTrackingVH> & DeadInsts)3391fcaf7f86SDimitry Andric void LSRInstance::GenerateIVChain(const IVChain &Chain,
33920b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
33930b57cec5SDimitry Andric // Find the new IVOperand for the head of the chain. It may have been replaced
33940b57cec5SDimitry Andric // by LSR.
33950b57cec5SDimitry Andric const IVInc &Head = Chain.Incs[0];
33960b57cec5SDimitry Andric User::op_iterator IVOpEnd = Head.UserInst->op_end();
33970b57cec5SDimitry Andric // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
33980b57cec5SDimitry Andric User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
33990b57cec5SDimitry Andric IVOpEnd, L, SE);
34000b57cec5SDimitry Andric Value *IVSrc = nullptr;
34010b57cec5SDimitry Andric while (IVOpIter != IVOpEnd) {
34020b57cec5SDimitry Andric IVSrc = getWideOperand(*IVOpIter);
34030b57cec5SDimitry Andric
34040b57cec5SDimitry Andric // If this operand computes the expression that the chain needs, we may use
34050b57cec5SDimitry Andric // it. (Check this after setting IVSrc which is used below.)
34060b57cec5SDimitry Andric //
34070b57cec5SDimitry Andric // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
34080b57cec5SDimitry Andric // narrow for the chain, so we can no longer use it. We do allow using a
34090b57cec5SDimitry Andric // wider phi, assuming the LSR checked for free truncation. In that case we
34100b57cec5SDimitry Andric // should already have a truncate on this operand such that
34110b57cec5SDimitry Andric // getSCEV(IVSrc) == IncExpr.
34120b57cec5SDimitry Andric if (SE.getSCEV(*IVOpIter) == Head.IncExpr
34130b57cec5SDimitry Andric || SE.getSCEV(IVSrc) == Head.IncExpr) {
34140b57cec5SDimitry Andric break;
34150b57cec5SDimitry Andric }
34160b57cec5SDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
34170b57cec5SDimitry Andric }
34180b57cec5SDimitry Andric if (IVOpIter == IVOpEnd) {
34190b57cec5SDimitry Andric // Gracefully give up on this chain.
34200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
34210b57cec5SDimitry Andric return;
34220b57cec5SDimitry Andric }
34238bcb0991SDimitry Andric assert(IVSrc && "Failed to find IV chain source");
34240b57cec5SDimitry Andric
34250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
34260b57cec5SDimitry Andric Type *IVTy = IVSrc->getType();
34270b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(IVTy);
34280b57cec5SDimitry Andric const SCEV *LeftOverExpr = nullptr;
34290fca6ea1SDimitry Andric const SCEV *Accum = SE.getZero(IntTy);
34300fca6ea1SDimitry Andric SmallVector<std::pair<const SCEV *, Value *>> Bases;
34310fca6ea1SDimitry Andric Bases.emplace_back(Accum, IVSrc);
34320fca6ea1SDimitry Andric
34330b57cec5SDimitry Andric for (const IVInc &Inc : Chain) {
34340b57cec5SDimitry Andric Instruction *InsertPt = Inc.UserInst;
34350b57cec5SDimitry Andric if (isa<PHINode>(InsertPt))
34360b57cec5SDimitry Andric InsertPt = L->getLoopLatch()->getTerminator();
34370b57cec5SDimitry Andric
34380b57cec5SDimitry Andric // IVOper will replace the current IV User's operand. IVSrc is the IV
34390b57cec5SDimitry Andric // value currently held in a register.
34400b57cec5SDimitry Andric Value *IVOper = IVSrc;
34410b57cec5SDimitry Andric if (!Inc.IncExpr->isZero()) {
34420b57cec5SDimitry Andric // IncExpr was the result of subtraction of two narrow values, so must
34430b57cec5SDimitry Andric // be signed.
34440b57cec5SDimitry Andric const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
34450fca6ea1SDimitry Andric Accum = SE.getAddExpr(Accum, IncExpr);
34460b57cec5SDimitry Andric LeftOverExpr = LeftOverExpr ?
34470b57cec5SDimitry Andric SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
34480b57cec5SDimitry Andric }
34490fca6ea1SDimitry Andric
34500fca6ea1SDimitry Andric // Look through each base to see if any can produce a nice addressing mode.
34510fca6ea1SDimitry Andric bool FoundBase = false;
34520fca6ea1SDimitry Andric for (auto [MapScev, MapIVOper] : reverse(Bases)) {
34530fca6ea1SDimitry Andric const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);
34540fca6ea1SDimitry Andric if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) {
34550fca6ea1SDimitry Andric if (!Remainder->isZero()) {
34560fca6ea1SDimitry Andric Rewriter.clearPostInc();
34570fca6ea1SDimitry Andric Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
34580fca6ea1SDimitry Andric const SCEV *IVOperExpr =
34590fca6ea1SDimitry Andric SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));
34600fca6ea1SDimitry Andric IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
34610fca6ea1SDimitry Andric } else {
34620fca6ea1SDimitry Andric IVOper = MapIVOper;
34630fca6ea1SDimitry Andric }
34640fca6ea1SDimitry Andric
34650fca6ea1SDimitry Andric FoundBase = true;
34660fca6ea1SDimitry Andric break;
34670fca6ea1SDimitry Andric }
34680fca6ea1SDimitry Andric }
34690fca6ea1SDimitry Andric if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
34700b57cec5SDimitry Andric // Expand the IV increment.
34710b57cec5SDimitry Andric Rewriter.clearPostInc();
34720b57cec5SDimitry Andric Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
34730b57cec5SDimitry Andric const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
34740b57cec5SDimitry Andric SE.getUnknown(IncV));
34750b57cec5SDimitry Andric IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
34760b57cec5SDimitry Andric
34770b57cec5SDimitry Andric // If an IV increment can't be folded, use it as the next IV value.
34780b57cec5SDimitry Andric if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
34790b57cec5SDimitry Andric assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
34800fca6ea1SDimitry Andric Bases.emplace_back(Accum, IVOper);
34810b57cec5SDimitry Andric IVSrc = IVOper;
34820b57cec5SDimitry Andric LeftOverExpr = nullptr;
34830b57cec5SDimitry Andric }
34840b57cec5SDimitry Andric }
34850b57cec5SDimitry Andric Type *OperTy = Inc.IVOperand->getType();
34860b57cec5SDimitry Andric if (IVTy != OperTy) {
34870b57cec5SDimitry Andric assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
34880b57cec5SDimitry Andric "cannot extend a chained IV");
34890b57cec5SDimitry Andric IRBuilder<> Builder(InsertPt);
34900b57cec5SDimitry Andric IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
34910b57cec5SDimitry Andric }
34920b57cec5SDimitry Andric Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
34935ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
34945ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr);
34950b57cec5SDimitry Andric }
34960b57cec5SDimitry Andric // If LSR created a new, wider phi, we may also replace its postinc. We only
34970b57cec5SDimitry Andric // do this if we also found a wide value for the head of the chain.
34980b57cec5SDimitry Andric if (isa<PHINode>(Chain.tailUserInst())) {
34990b57cec5SDimitry Andric for (PHINode &Phi : L->getHeader()->phis()) {
35005f757f3fSDimitry Andric if (Phi.getType() != IVSrc->getType())
35010b57cec5SDimitry Andric continue;
35020b57cec5SDimitry Andric Instruction *PostIncV = dyn_cast<Instruction>(
35030b57cec5SDimitry Andric Phi.getIncomingValueForBlock(L->getLoopLatch()));
35040b57cec5SDimitry Andric if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
35050b57cec5SDimitry Andric continue;
35060b57cec5SDimitry Andric Value *IVOper = IVSrc;
35070b57cec5SDimitry Andric Type *PostIncTy = PostIncV->getType();
35080b57cec5SDimitry Andric if (IVTy != PostIncTy) {
35090b57cec5SDimitry Andric assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
35100b57cec5SDimitry Andric IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
35110b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
35120b57cec5SDimitry Andric IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
35130b57cec5SDimitry Andric }
35140b57cec5SDimitry Andric Phi.replaceUsesOfWith(PostIncV, IVOper);
35150b57cec5SDimitry Andric DeadInsts.emplace_back(PostIncV);
35160b57cec5SDimitry Andric }
35170b57cec5SDimitry Andric }
35180b57cec5SDimitry Andric }
35190b57cec5SDimitry Andric
CollectFixupsAndInitialFormulae()35200b57cec5SDimitry Andric void LSRInstance::CollectFixupsAndInitialFormulae() {
35210b57cec5SDimitry Andric BranchInst *ExitBranch = nullptr;
35225ffd83dbSDimitry Andric bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
35230b57cec5SDimitry Andric
3524bdd1243dSDimitry Andric // For calculating baseline cost
3525bdd1243dSDimitry Andric SmallPtrSet<const SCEV *, 16> Regs;
3526bdd1243dSDimitry Andric DenseSet<const SCEV *> VisitedRegs;
3527bdd1243dSDimitry Andric DenseSet<size_t> VisitedLSRUse;
3528bdd1243dSDimitry Andric
35290b57cec5SDimitry Andric for (const IVStrideUse &U : IU) {
35300b57cec5SDimitry Andric Instruction *UserInst = U.getUser();
35310b57cec5SDimitry Andric // Skip IV users that are part of profitable IV Chains.
35320b57cec5SDimitry Andric User::op_iterator UseI =
35330b57cec5SDimitry Andric find(UserInst->operands(), U.getOperandValToReplace());
35340b57cec5SDimitry Andric assert(UseI != UserInst->op_end() && "cannot find IV operand");
35350b57cec5SDimitry Andric if (IVIncSet.count(UseI)) {
35360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
35370b57cec5SDimitry Andric continue;
35380b57cec5SDimitry Andric }
35390b57cec5SDimitry Andric
35400b57cec5SDimitry Andric LSRUse::KindType Kind = LSRUse::Basic;
35410b57cec5SDimitry Andric MemAccessTy AccessTy;
35420b57cec5SDimitry Andric if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
35430b57cec5SDimitry Andric Kind = LSRUse::Address;
35440b57cec5SDimitry Andric AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
35450b57cec5SDimitry Andric }
35460b57cec5SDimitry Andric
35470b57cec5SDimitry Andric const SCEV *S = IU.getExpr(U);
354806c3fb27SDimitry Andric if (!S)
354906c3fb27SDimitry Andric continue;
35500b57cec5SDimitry Andric PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
35510b57cec5SDimitry Andric
35520b57cec5SDimitry Andric // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
35530b57cec5SDimitry Andric // (N - i == 0), and this allows (N - i) to be the expression that we work
35540b57cec5SDimitry Andric // with rather than just N or i, so we can consider the register
35550b57cec5SDimitry Andric // requirements for both N and i at the same time. Limiting this code to
35560b57cec5SDimitry Andric // equality icmps is not a problem because all interesting loops use
35570b57cec5SDimitry Andric // equality icmps, thanks to IndVarSimplify.
35588bcb0991SDimitry Andric if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
35590b57cec5SDimitry Andric // If CI can be saved in some target, like replaced inside hardware loop
35600b57cec5SDimitry Andric // in PowerPC, no need to generate initial formulae for it.
35610b57cec5SDimitry Andric if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
35620b57cec5SDimitry Andric continue;
35638bcb0991SDimitry Andric if (CI->isEquality()) {
35640b57cec5SDimitry Andric // Swap the operands if needed to put the OperandValToReplace on the
35650b57cec5SDimitry Andric // left, for consistency.
35660b57cec5SDimitry Andric Value *NV = CI->getOperand(1);
35670b57cec5SDimitry Andric if (NV == U.getOperandValToReplace()) {
35680b57cec5SDimitry Andric CI->setOperand(1, CI->getOperand(0));
35690b57cec5SDimitry Andric CI->setOperand(0, NV);
35700b57cec5SDimitry Andric NV = CI->getOperand(1);
35710b57cec5SDimitry Andric Changed = true;
35720b57cec5SDimitry Andric }
35730b57cec5SDimitry Andric
35740b57cec5SDimitry Andric // x == y --> x - y == 0
35750b57cec5SDimitry Andric const SCEV *N = SE.getSCEV(NV);
3576fcaf7f86SDimitry Andric if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
3577fe6060f1SDimitry Andric (!NV->getType()->isPointerTy() ||
3578fe6060f1SDimitry Andric SE.getPointerBase(N) == SE.getPointerBase(S))) {
35790b57cec5SDimitry Andric // S is normalized, so normalize N before folding it into S
35800b57cec5SDimitry Andric // to keep the result normalized.
35810b57cec5SDimitry Andric N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
358206c3fb27SDimitry Andric if (!N)
358306c3fb27SDimitry Andric continue;
35840b57cec5SDimitry Andric Kind = LSRUse::ICmpZero;
35850b57cec5SDimitry Andric S = SE.getMinusSCEV(N, S);
3586fcaf7f86SDimitry Andric } else if (L->isLoopInvariant(NV) &&
3587fcaf7f86SDimitry Andric (!isa<Instruction>(NV) ||
3588fcaf7f86SDimitry Andric DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
3589fcaf7f86SDimitry Andric !NV->getType()->isPointerTy()) {
3590fcaf7f86SDimitry Andric // If we can't generally expand the expression (e.g. it contains
3591fcaf7f86SDimitry Andric // a divide), but it is already at a loop invariant point before the
3592fcaf7f86SDimitry Andric // loop, wrap it in an unknown (to prevent the expander from trying
3593fcaf7f86SDimitry Andric // to re-expand in a potentially unsafe way.) The restriction to
3594fcaf7f86SDimitry Andric // integer types is required because the unknown hides the base, and
3595fcaf7f86SDimitry Andric // SCEV can't compute the difference of two unknown pointers.
3596fcaf7f86SDimitry Andric N = SE.getUnknown(NV);
3597fcaf7f86SDimitry Andric N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
359806c3fb27SDimitry Andric if (!N)
359906c3fb27SDimitry Andric continue;
3600fcaf7f86SDimitry Andric Kind = LSRUse::ICmpZero;
3601fcaf7f86SDimitry Andric S = SE.getMinusSCEV(N, S);
3602fcaf7f86SDimitry Andric assert(!isa<SCEVCouldNotCompute>(S));
36030b57cec5SDimitry Andric }
36040b57cec5SDimitry Andric
36050b57cec5SDimitry Andric // -1 and the negations of all interesting strides (except the negation
36060b57cec5SDimitry Andric // of -1) are now also interesting.
36070b57cec5SDimitry Andric for (size_t i = 0, e = Factors.size(); i != e; ++i)
36080b57cec5SDimitry Andric if (Factors[i] != -1)
36090b57cec5SDimitry Andric Factors.insert(-(uint64_t)Factors[i]);
36100b57cec5SDimitry Andric Factors.insert(-1);
36110b57cec5SDimitry Andric }
36128bcb0991SDimitry Andric }
36130b57cec5SDimitry Andric
36140b57cec5SDimitry Andric // Get or create an LSRUse.
36150fca6ea1SDimitry Andric std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);
36160b57cec5SDimitry Andric size_t LUIdx = P.first;
36170fca6ea1SDimitry Andric Immediate Offset = P.second;
36180b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
36190b57cec5SDimitry Andric
36200b57cec5SDimitry Andric // Record the fixup.
36210b57cec5SDimitry Andric LSRFixup &LF = LU.getNewFixup();
36220b57cec5SDimitry Andric LF.UserInst = UserInst;
36230b57cec5SDimitry Andric LF.OperandValToReplace = U.getOperandValToReplace();
36240b57cec5SDimitry Andric LF.PostIncLoops = TmpPostIncLoops;
36250b57cec5SDimitry Andric LF.Offset = Offset;
36260b57cec5SDimitry Andric LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
36270b57cec5SDimitry Andric
3628bdd1243dSDimitry Andric // Create SCEV as Formula for calculating baseline cost
3629bdd1243dSDimitry Andric if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3630bdd1243dSDimitry Andric Formula F;
3631bdd1243dSDimitry Andric F.initialMatch(S, L, SE);
3632bdd1243dSDimitry Andric BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3633bdd1243dSDimitry Andric VisitedLSRUse.insert(LUIdx);
3634bdd1243dSDimitry Andric }
3635bdd1243dSDimitry Andric
36360b57cec5SDimitry Andric if (!LU.WidestFixupType ||
36370b57cec5SDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) <
36380b57cec5SDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
36390b57cec5SDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType();
36400b57cec5SDimitry Andric
36410b57cec5SDimitry Andric // If this is the first use of this LSRUse, give it a formula.
36420b57cec5SDimitry Andric if (LU.Formulae.empty()) {
36430b57cec5SDimitry Andric InsertInitialFormula(S, LU, LUIdx);
36440b57cec5SDimitry Andric CountRegisters(LU.Formulae.back(), LUIdx);
36450b57cec5SDimitry Andric }
36460b57cec5SDimitry Andric }
36470b57cec5SDimitry Andric
36480b57cec5SDimitry Andric LLVM_DEBUG(print_fixups(dbgs()));
36490b57cec5SDimitry Andric }
36500b57cec5SDimitry Andric
36510b57cec5SDimitry Andric /// Insert a formula for the given expression into the given use, separating out
36520b57cec5SDimitry Andric /// loop-variant portions from loop-invariant and loop-computable portions.
InsertInitialFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3653fcaf7f86SDimitry Andric void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3654fcaf7f86SDimitry Andric size_t LUIdx) {
36550b57cec5SDimitry Andric // Mark uses whose expressions cannot be expanded.
3656fcaf7f86SDimitry Andric if (!Rewriter.isSafeToExpand(S))
36570b57cec5SDimitry Andric LU.RigidFormula = true;
36580b57cec5SDimitry Andric
36590b57cec5SDimitry Andric Formula F;
36600b57cec5SDimitry Andric F.initialMatch(S, L, SE);
36610b57cec5SDimitry Andric bool Inserted = InsertFormula(LU, LUIdx, F);
36620b57cec5SDimitry Andric assert(Inserted && "Initial formula already exists!"); (void)Inserted;
36630b57cec5SDimitry Andric }
36640b57cec5SDimitry Andric
36650b57cec5SDimitry Andric /// Insert a simple single-register formula for the given expression into the
36660b57cec5SDimitry Andric /// given use.
36670b57cec5SDimitry Andric void
InsertSupplementalFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)36680b57cec5SDimitry Andric LSRInstance::InsertSupplementalFormula(const SCEV *S,
36690b57cec5SDimitry Andric LSRUse &LU, size_t LUIdx) {
36700b57cec5SDimitry Andric Formula F;
36710b57cec5SDimitry Andric F.BaseRegs.push_back(S);
36720b57cec5SDimitry Andric F.HasBaseReg = true;
36730b57cec5SDimitry Andric bool Inserted = InsertFormula(LU, LUIdx, F);
36740b57cec5SDimitry Andric assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
36750b57cec5SDimitry Andric }
36760b57cec5SDimitry Andric
36770b57cec5SDimitry Andric /// Note which registers are used by the given formula, updating RegUses.
CountRegisters(const Formula & F,size_t LUIdx)36780b57cec5SDimitry Andric void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
36790b57cec5SDimitry Andric if (F.ScaledReg)
36800b57cec5SDimitry Andric RegUses.countRegister(F.ScaledReg, LUIdx);
36810b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs)
36820b57cec5SDimitry Andric RegUses.countRegister(BaseReg, LUIdx);
36830b57cec5SDimitry Andric }
36840b57cec5SDimitry Andric
36850b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
36860b57cec5SDimitry Andric /// return true. Return false otherwise.
InsertFormula(LSRUse & LU,unsigned LUIdx,const Formula & F)36870b57cec5SDimitry Andric bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
36880b57cec5SDimitry Andric // Do not insert formula that we will not be able to expand.
36890b57cec5SDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
36900b57cec5SDimitry Andric "Formula is illegal");
36910b57cec5SDimitry Andric
36920b57cec5SDimitry Andric if (!LU.InsertFormula(F, *L))
36930b57cec5SDimitry Andric return false;
36940b57cec5SDimitry Andric
36950b57cec5SDimitry Andric CountRegisters(F, LUIdx);
36960b57cec5SDimitry Andric return true;
36970b57cec5SDimitry Andric }
36980b57cec5SDimitry Andric
36990b57cec5SDimitry Andric /// Check for other uses of loop-invariant values which we're tracking. These
37000b57cec5SDimitry Andric /// other uses will pin these values in registers, making them less profitable
37010b57cec5SDimitry Andric /// for elimination.
37020b57cec5SDimitry Andric /// TODO: This currently misses non-constant addrec step registers.
37030b57cec5SDimitry Andric /// TODO: Should this give more weight to users inside the loop?
37040b57cec5SDimitry Andric void
CollectLoopInvariantFixupsAndFormulae()37050b57cec5SDimitry Andric LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
37060b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
37070b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 32> Visited;
37080b57cec5SDimitry Andric
37095f757f3fSDimitry Andric // Don't collect outside uses if we are favoring postinc - the instructions in
37105f757f3fSDimitry Andric // the loop are more important than the ones outside of it.
37115f757f3fSDimitry Andric if (AMK == TTI::AMK_PostIndexed)
37125f757f3fSDimitry Andric return;
37135f757f3fSDimitry Andric
37140b57cec5SDimitry Andric while (!Worklist.empty()) {
37150b57cec5SDimitry Andric const SCEV *S = Worklist.pop_back_val();
37160b57cec5SDimitry Andric
37170b57cec5SDimitry Andric // Don't process the same SCEV twice
37180b57cec5SDimitry Andric if (!Visited.insert(S).second)
37190b57cec5SDimitry Andric continue;
37200b57cec5SDimitry Andric
37210b57cec5SDimitry Andric if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3722bdd1243dSDimitry Andric append_range(Worklist, N->operands());
3723e8d8bef9SDimitry Andric else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
37240b57cec5SDimitry Andric Worklist.push_back(C->getOperand());
37250b57cec5SDimitry Andric else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
37260b57cec5SDimitry Andric Worklist.push_back(D->getLHS());
37270b57cec5SDimitry Andric Worklist.push_back(D->getRHS());
37280b57cec5SDimitry Andric } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
37290b57cec5SDimitry Andric const Value *V = US->getValue();
37300b57cec5SDimitry Andric if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
37310b57cec5SDimitry Andric // Look for instructions defined outside the loop.
37320b57cec5SDimitry Andric if (L->contains(Inst)) continue;
373306c3fb27SDimitry Andric } else if (isa<Constant>(V))
373406c3fb27SDimitry Andric // Constants can be re-materialized.
37350b57cec5SDimitry Andric continue;
37360b57cec5SDimitry Andric for (const Use &U : V->uses()) {
37370b57cec5SDimitry Andric const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
37380b57cec5SDimitry Andric // Ignore non-instructions.
37390b57cec5SDimitry Andric if (!UserInst)
37400b57cec5SDimitry Andric continue;
3741fe6060f1SDimitry Andric // Don't bother if the instruction is an EHPad.
3742fe6060f1SDimitry Andric if (UserInst->isEHPad())
3743fe6060f1SDimitry Andric continue;
37440b57cec5SDimitry Andric // Ignore instructions in other functions (as can happen with
37450b57cec5SDimitry Andric // Constants).
37460b57cec5SDimitry Andric if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
37470b57cec5SDimitry Andric continue;
37480b57cec5SDimitry Andric // Ignore instructions not dominated by the loop.
37490b57cec5SDimitry Andric const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
37500b57cec5SDimitry Andric UserInst->getParent() :
37510b57cec5SDimitry Andric cast<PHINode>(UserInst)->getIncomingBlock(
37520b57cec5SDimitry Andric PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
37530b57cec5SDimitry Andric if (!DT.dominates(L->getHeader(), UseBB))
37540b57cec5SDimitry Andric continue;
37550b57cec5SDimitry Andric // Don't bother if the instruction is in a BB which ends in an EHPad.
37560b57cec5SDimitry Andric if (UseBB->getTerminator()->isEHPad())
37570b57cec5SDimitry Andric continue;
375804eeddc0SDimitry Andric
375904eeddc0SDimitry Andric // Ignore cases in which the currently-examined value could come from
376004eeddc0SDimitry Andric // a basic block terminated with an EHPad. This checks all incoming
376104eeddc0SDimitry Andric // blocks of the phi node since it is possible that the same incoming
376204eeddc0SDimitry Andric // value comes from multiple basic blocks, only some of which may end
376304eeddc0SDimitry Andric // in an EHPad. If any of them do, a subsequent rewrite attempt by this
376404eeddc0SDimitry Andric // pass would try to insert instructions into an EHPad, hitting an
376504eeddc0SDimitry Andric // assertion.
376604eeddc0SDimitry Andric if (isa<PHINode>(UserInst)) {
376704eeddc0SDimitry Andric const auto *PhiNode = cast<PHINode>(UserInst);
376804eeddc0SDimitry Andric bool HasIncompatibleEHPTerminatedBlock = false;
376904eeddc0SDimitry Andric llvm::Value *ExpectedValue = U;
377004eeddc0SDimitry Andric for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {
377104eeddc0SDimitry Andric if (PhiNode->getIncomingValue(I) == ExpectedValue) {
377204eeddc0SDimitry Andric if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
377304eeddc0SDimitry Andric HasIncompatibleEHPTerminatedBlock = true;
377404eeddc0SDimitry Andric break;
377504eeddc0SDimitry Andric }
377604eeddc0SDimitry Andric }
377704eeddc0SDimitry Andric }
377804eeddc0SDimitry Andric if (HasIncompatibleEHPTerminatedBlock) {
377904eeddc0SDimitry Andric continue;
378004eeddc0SDimitry Andric }
378104eeddc0SDimitry Andric }
378204eeddc0SDimitry Andric
37830b57cec5SDimitry Andric // Don't bother rewriting PHIs in catchswitch blocks.
37840b57cec5SDimitry Andric if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
37850b57cec5SDimitry Andric continue;
37860b57cec5SDimitry Andric // Ignore uses which are part of other SCEV expressions, to avoid
37870b57cec5SDimitry Andric // analyzing them multiple times.
37880b57cec5SDimitry Andric if (SE.isSCEVable(UserInst->getType())) {
37890b57cec5SDimitry Andric const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
37900b57cec5SDimitry Andric // If the user is a no-op, look through to its uses.
37910b57cec5SDimitry Andric if (!isa<SCEVUnknown>(UserS))
37920b57cec5SDimitry Andric continue;
37930b57cec5SDimitry Andric if (UserS == US) {
37940b57cec5SDimitry Andric Worklist.push_back(
37950b57cec5SDimitry Andric SE.getUnknown(const_cast<Instruction *>(UserInst)));
37960b57cec5SDimitry Andric continue;
37970b57cec5SDimitry Andric }
37980b57cec5SDimitry Andric }
37990b57cec5SDimitry Andric // Ignore icmp instructions which are already being analyzed.
38000b57cec5SDimitry Andric if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
38010b57cec5SDimitry Andric unsigned OtherIdx = !U.getOperandNo();
38020b57cec5SDimitry Andric Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
38030b57cec5SDimitry Andric if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
38040b57cec5SDimitry Andric continue;
38050b57cec5SDimitry Andric }
38060b57cec5SDimitry Andric
38070fca6ea1SDimitry Andric std::pair<size_t, Immediate> P =
38080fca6ea1SDimitry Andric getUse(S, LSRUse::Basic, MemAccessTy());
38090b57cec5SDimitry Andric size_t LUIdx = P.first;
38100fca6ea1SDimitry Andric Immediate Offset = P.second;
38110b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
38120b57cec5SDimitry Andric LSRFixup &LF = LU.getNewFixup();
38130b57cec5SDimitry Andric LF.UserInst = const_cast<Instruction *>(UserInst);
38140b57cec5SDimitry Andric LF.OperandValToReplace = U;
38150b57cec5SDimitry Andric LF.Offset = Offset;
38160b57cec5SDimitry Andric LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
38170b57cec5SDimitry Andric if (!LU.WidestFixupType ||
38180b57cec5SDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) <
38190b57cec5SDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
38200b57cec5SDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType();
38210b57cec5SDimitry Andric InsertSupplementalFormula(US, LU, LUIdx);
38220b57cec5SDimitry Andric CountRegisters(LU.Formulae.back(), Uses.size() - 1);
38230b57cec5SDimitry Andric break;
38240b57cec5SDimitry Andric }
38250b57cec5SDimitry Andric }
38260b57cec5SDimitry Andric }
38270b57cec5SDimitry Andric }
38280b57cec5SDimitry Andric
38290b57cec5SDimitry Andric /// Split S into subexpressions which can be pulled out into separate
38300b57cec5SDimitry Andric /// registers. If C is non-null, multiply each subexpression by C.
38310b57cec5SDimitry Andric ///
38320b57cec5SDimitry Andric /// Return remainder expression after factoring the subexpressions captured by
38330b57cec5SDimitry Andric /// Ops. If Ops is complete, return NULL.
CollectSubexprs(const SCEV * S,const SCEVConstant * C,SmallVectorImpl<const SCEV * > & Ops,const Loop * L,ScalarEvolution & SE,unsigned Depth=0)38340b57cec5SDimitry Andric static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
38350b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Ops,
38360b57cec5SDimitry Andric const Loop *L,
38370b57cec5SDimitry Andric ScalarEvolution &SE,
38380b57cec5SDimitry Andric unsigned Depth = 0) {
38390b57cec5SDimitry Andric // Arbitrarily cap recursion to protect compile time.
38400b57cec5SDimitry Andric if (Depth >= 3)
38410b57cec5SDimitry Andric return S;
38420b57cec5SDimitry Andric
38430b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
38440b57cec5SDimitry Andric // Break out add operands.
38450b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) {
38460b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
38470b57cec5SDimitry Andric if (Remainder)
38480b57cec5SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
38490b57cec5SDimitry Andric }
38500b57cec5SDimitry Andric return nullptr;
38510b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
38520b57cec5SDimitry Andric // Split a non-zero base out of an addrec.
38530b57cec5SDimitry Andric if (AR->getStart()->isZero() || !AR->isAffine())
38540b57cec5SDimitry Andric return S;
38550b57cec5SDimitry Andric
38560b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(AR->getStart(),
38570b57cec5SDimitry Andric C, Ops, L, SE, Depth+1);
38580b57cec5SDimitry Andric // Split the non-zero AddRec unless it is part of a nested recurrence that
38590b57cec5SDimitry Andric // does not pertain to this loop.
38600b57cec5SDimitry Andric if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
38610b57cec5SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
38620b57cec5SDimitry Andric Remainder = nullptr;
38630b57cec5SDimitry Andric }
38640b57cec5SDimitry Andric if (Remainder != AR->getStart()) {
38650b57cec5SDimitry Andric if (!Remainder)
38660b57cec5SDimitry Andric Remainder = SE.getConstant(AR->getType(), 0);
38670b57cec5SDimitry Andric return SE.getAddRecExpr(Remainder,
38680b57cec5SDimitry Andric AR->getStepRecurrence(SE),
38690b57cec5SDimitry Andric AR->getLoop(),
38700b57cec5SDimitry Andric //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
38710b57cec5SDimitry Andric SCEV::FlagAnyWrap);
38720b57cec5SDimitry Andric }
38730b57cec5SDimitry Andric } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
38740b57cec5SDimitry Andric // Break (C * (a + b + c)) into C*a + C*b + C*c.
38750b57cec5SDimitry Andric if (Mul->getNumOperands() != 2)
38760b57cec5SDimitry Andric return S;
38770b57cec5SDimitry Andric if (const SCEVConstant *Op0 =
38780b57cec5SDimitry Andric dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
38790b57cec5SDimitry Andric C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
38800b57cec5SDimitry Andric const SCEV *Remainder =
38810b57cec5SDimitry Andric CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
38820b57cec5SDimitry Andric if (Remainder)
38830b57cec5SDimitry Andric Ops.push_back(SE.getMulExpr(C, Remainder));
38840b57cec5SDimitry Andric return nullptr;
38850b57cec5SDimitry Andric }
38860b57cec5SDimitry Andric }
38870b57cec5SDimitry Andric return S;
38880b57cec5SDimitry Andric }
38890b57cec5SDimitry Andric
38900b57cec5SDimitry Andric /// Return true if the SCEV represents a value that may end up as a
38910b57cec5SDimitry Andric /// post-increment operation.
mayUsePostIncMode(const TargetTransformInfo & TTI,LSRUse & LU,const SCEV * S,const Loop * L,ScalarEvolution & SE)38920b57cec5SDimitry Andric static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
38930b57cec5SDimitry Andric LSRUse &LU, const SCEV *S, const Loop *L,
38940b57cec5SDimitry Andric ScalarEvolution &SE) {
38950b57cec5SDimitry Andric if (LU.Kind != LSRUse::Address ||
38960b57cec5SDimitry Andric !LU.AccessTy.getType()->isIntOrIntVectorTy())
38970b57cec5SDimitry Andric return false;
38980b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
38990b57cec5SDimitry Andric if (!AR)
39000b57cec5SDimitry Andric return false;
39010b57cec5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(SE);
39020b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStep))
39030b57cec5SDimitry Andric return false;
39040b57cec5SDimitry Andric // Check if a post-indexed load/store can be used.
39050b57cec5SDimitry Andric if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
39060b57cec5SDimitry Andric TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
39070b57cec5SDimitry Andric const SCEV *LoopStart = AR->getStart();
39080b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
39090b57cec5SDimitry Andric return true;
39100b57cec5SDimitry Andric }
39110b57cec5SDimitry Andric return false;
39120b57cec5SDimitry Andric }
39130b57cec5SDimitry Andric
39140b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateReassociations.
GenerateReassociationsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,unsigned Depth,size_t Idx,bool IsScaledReg)39150b57cec5SDimitry Andric void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
39160b57cec5SDimitry Andric const Formula &Base,
39170b57cec5SDimitry Andric unsigned Depth, size_t Idx,
39180b57cec5SDimitry Andric bool IsScaledReg) {
39190b57cec5SDimitry Andric const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
39200b57cec5SDimitry Andric // Don't generate reassociations for the base register of a value that
39210b57cec5SDimitry Andric // may generate a post-increment operator. The reason is that the
39220b57cec5SDimitry Andric // reassociations cause extra base+register formula to be created,
39230b57cec5SDimitry Andric // and possibly chosen, but the post-increment is more efficient.
3924fe6060f1SDimitry Andric if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
39250b57cec5SDimitry Andric return;
39260b57cec5SDimitry Andric SmallVector<const SCEV *, 8> AddOps;
39270b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
39280b57cec5SDimitry Andric if (Remainder)
39290b57cec5SDimitry Andric AddOps.push_back(Remainder);
39300b57cec5SDimitry Andric
39310b57cec5SDimitry Andric if (AddOps.size() == 1)
39320b57cec5SDimitry Andric return;
39330b57cec5SDimitry Andric
39340b57cec5SDimitry Andric for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
39350b57cec5SDimitry Andric JE = AddOps.end();
39360b57cec5SDimitry Andric J != JE; ++J) {
39370b57cec5SDimitry Andric // Loop-variant "unknown" values are uninteresting; we won't be able to
39380b57cec5SDimitry Andric // do anything meaningful with them.
39390b57cec5SDimitry Andric if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
39400b57cec5SDimitry Andric continue;
39410b57cec5SDimitry Andric
39420b57cec5SDimitry Andric // Don't pull a constant into a register if the constant could be folded
39430b57cec5SDimitry Andric // into an immediate field.
39440b57cec5SDimitry Andric if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
39450b57cec5SDimitry Andric LU.AccessTy, *J, Base.getNumRegs() > 1))
39460b57cec5SDimitry Andric continue;
39470b57cec5SDimitry Andric
39480b57cec5SDimitry Andric // Collect all operands except *J.
39490b57cec5SDimitry Andric SmallVector<const SCEV *, 8> InnerAddOps(
39500b57cec5SDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
39510b57cec5SDimitry Andric InnerAddOps.append(std::next(J),
39520b57cec5SDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).end());
39530b57cec5SDimitry Andric
39540b57cec5SDimitry Andric // Don't leave just a constant behind in a register if the constant could
39550b57cec5SDimitry Andric // be folded into an immediate field.
39560b57cec5SDimitry Andric if (InnerAddOps.size() == 1 &&
39570b57cec5SDimitry Andric isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
39580b57cec5SDimitry Andric LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
39590b57cec5SDimitry Andric continue;
39600b57cec5SDimitry Andric
39610b57cec5SDimitry Andric const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
39620b57cec5SDimitry Andric if (InnerSum->isZero())
39630b57cec5SDimitry Andric continue;
39640b57cec5SDimitry Andric Formula F = Base;
39650b57cec5SDimitry Andric
39660fca6ea1SDimitry Andric if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
39670fca6ea1SDimitry Andric continue;
39680fca6ea1SDimitry Andric
39690b57cec5SDimitry Andric // Add the remaining pieces of the add back into the new formula.
39700b57cec5SDimitry Andric const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
39710b57cec5SDimitry Andric if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
39720fca6ea1SDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
39730b57cec5SDimitry Andric InnerSumSC->getValue()->getZExtValue())) {
39740b57cec5SDimitry Andric F.UnfoldedOffset =
39750fca6ea1SDimitry Andric Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
39760fca6ea1SDimitry Andric InnerSumSC->getValue()->getZExtValue());
39770b57cec5SDimitry Andric if (IsScaledReg)
39780b57cec5SDimitry Andric F.ScaledReg = nullptr;
39790b57cec5SDimitry Andric else
39800b57cec5SDimitry Andric F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
39810b57cec5SDimitry Andric } else if (IsScaledReg)
39820b57cec5SDimitry Andric F.ScaledReg = InnerSum;
39830b57cec5SDimitry Andric else
39840b57cec5SDimitry Andric F.BaseRegs[Idx] = InnerSum;
39850b57cec5SDimitry Andric
39860b57cec5SDimitry Andric // Add J as its own register, or an unfolded immediate.
39870b57cec5SDimitry Andric const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
39880b57cec5SDimitry Andric if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
39890fca6ea1SDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
39900b57cec5SDimitry Andric SC->getValue()->getZExtValue()))
39910b57cec5SDimitry Andric F.UnfoldedOffset =
39920fca6ea1SDimitry Andric Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
39930fca6ea1SDimitry Andric SC->getValue()->getZExtValue());
39940b57cec5SDimitry Andric else
39950b57cec5SDimitry Andric F.BaseRegs.push_back(*J);
39960b57cec5SDimitry Andric // We may have changed the number of register in base regs, adjust the
39970b57cec5SDimitry Andric // formula accordingly.
39980b57cec5SDimitry Andric F.canonicalize(*L);
39990b57cec5SDimitry Andric
40000b57cec5SDimitry Andric if (InsertFormula(LU, LUIdx, F))
40010b57cec5SDimitry Andric // If that formula hadn't been seen before, recurse to find more like
40020b57cec5SDimitry Andric // it.
40030b57cec5SDimitry Andric // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
40040b57cec5SDimitry Andric // Because just Depth is not enough to bound compile time.
40050b57cec5SDimitry Andric // This means that every time AddOps.size() is greater 16^x we will add
40060b57cec5SDimitry Andric // x to Depth.
40070b57cec5SDimitry Andric GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
40080b57cec5SDimitry Andric Depth + 1 + (Log2_32(AddOps.size()) >> 2));
40090b57cec5SDimitry Andric }
40100b57cec5SDimitry Andric }
40110b57cec5SDimitry Andric
40120b57cec5SDimitry Andric /// Split out subexpressions from adds and the bases of addrecs.
GenerateReassociations(LSRUse & LU,unsigned LUIdx,Formula Base,unsigned Depth)40130b57cec5SDimitry Andric void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
40140b57cec5SDimitry Andric Formula Base, unsigned Depth) {
40150b57cec5SDimitry Andric assert(Base.isCanonical(*L) && "Input must be in the canonical form");
40160b57cec5SDimitry Andric // Arbitrarily cap recursion to protect compile time.
40170b57cec5SDimitry Andric if (Depth >= 3)
40180b57cec5SDimitry Andric return;
40190b57cec5SDimitry Andric
40200b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
40210b57cec5SDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
40220b57cec5SDimitry Andric
40230b57cec5SDimitry Andric if (Base.Scale == 1)
40240b57cec5SDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
40250b57cec5SDimitry Andric /* Idx */ -1, /* IsScaledReg */ true);
40260b57cec5SDimitry Andric }
40270b57cec5SDimitry Andric
40280b57cec5SDimitry Andric /// Generate a formula consisting of all of the loop-dominating registers added
40290b57cec5SDimitry Andric /// into a single register.
GenerateCombinations(LSRUse & LU,unsigned LUIdx,Formula Base)40300b57cec5SDimitry Andric void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
40310b57cec5SDimitry Andric Formula Base) {
40320b57cec5SDimitry Andric // This method is only interesting on a plurality of registers.
40330b57cec5SDimitry Andric if (Base.BaseRegs.size() + (Base.Scale == 1) +
40340fca6ea1SDimitry Andric (Base.UnfoldedOffset.isNonZero()) <=
40350fca6ea1SDimitry Andric 1)
40360b57cec5SDimitry Andric return;
40370b57cec5SDimitry Andric
40380b57cec5SDimitry Andric // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
40390b57cec5SDimitry Andric // processing the formula.
40400b57cec5SDimitry Andric Base.unscale();
40410b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Ops;
40420b57cec5SDimitry Andric Formula NewBase = Base;
40430b57cec5SDimitry Andric NewBase.BaseRegs.clear();
40440b57cec5SDimitry Andric Type *CombinedIntegerType = nullptr;
40450b57cec5SDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs) {
40460b57cec5SDimitry Andric if (SE.properlyDominates(BaseReg, L->getHeader()) &&
40470b57cec5SDimitry Andric !SE.hasComputableLoopEvolution(BaseReg, L)) {
40480b57cec5SDimitry Andric if (!CombinedIntegerType)
40490b57cec5SDimitry Andric CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
40500b57cec5SDimitry Andric Ops.push_back(BaseReg);
40510b57cec5SDimitry Andric }
40520b57cec5SDimitry Andric else
40530b57cec5SDimitry Andric NewBase.BaseRegs.push_back(BaseReg);
40540b57cec5SDimitry Andric }
40550b57cec5SDimitry Andric
40560b57cec5SDimitry Andric // If no register is relevant, we're done.
40570b57cec5SDimitry Andric if (Ops.size() == 0)
40580b57cec5SDimitry Andric return;
40590b57cec5SDimitry Andric
40600b57cec5SDimitry Andric // Utility function for generating the required variants of the combined
40610b57cec5SDimitry Andric // registers.
40620b57cec5SDimitry Andric auto GenerateFormula = [&](const SCEV *Sum) {
40630b57cec5SDimitry Andric Formula F = NewBase;
40640b57cec5SDimitry Andric
40650b57cec5SDimitry Andric // TODO: If Sum is zero, it probably means ScalarEvolution missed an
40660b57cec5SDimitry Andric // opportunity to fold something. For now, just ignore such cases
40670b57cec5SDimitry Andric // rather than proceed with zero in a register.
40680b57cec5SDimitry Andric if (Sum->isZero())
40690b57cec5SDimitry Andric return;
40700b57cec5SDimitry Andric
40710b57cec5SDimitry Andric F.BaseRegs.push_back(Sum);
40720b57cec5SDimitry Andric F.canonicalize(*L);
40730b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
40740b57cec5SDimitry Andric };
40750b57cec5SDimitry Andric
40760b57cec5SDimitry Andric // If we collected at least two registers, generate a formula combining them.
40770b57cec5SDimitry Andric if (Ops.size() > 1) {
40780b57cec5SDimitry Andric SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
40790b57cec5SDimitry Andric GenerateFormula(SE.getAddExpr(OpsCopy));
40800b57cec5SDimitry Andric }
40810b57cec5SDimitry Andric
40820b57cec5SDimitry Andric // If we have an unfolded offset, generate a formula combining it with the
40830b57cec5SDimitry Andric // registers collected.
40840fca6ea1SDimitry Andric if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
40850b57cec5SDimitry Andric assert(CombinedIntegerType && "Missing a type for the unfolded offset");
40860fca6ea1SDimitry Andric Ops.push_back(SE.getConstant(CombinedIntegerType,
40870fca6ea1SDimitry Andric NewBase.UnfoldedOffset.getFixedValue(), true));
40880fca6ea1SDimitry Andric NewBase.UnfoldedOffset = Immediate::getFixed(0);
40890b57cec5SDimitry Andric GenerateFormula(SE.getAddExpr(Ops));
40900b57cec5SDimitry Andric }
40910b57cec5SDimitry Andric }
40920b57cec5SDimitry Andric
40930b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateSymbolicOffsets.
GenerateSymbolicOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,size_t Idx,bool IsScaledReg)40940b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
40950b57cec5SDimitry Andric const Formula &Base, size_t Idx,
40960b57cec5SDimitry Andric bool IsScaledReg) {
40970b57cec5SDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
40980b57cec5SDimitry Andric GlobalValue *GV = ExtractSymbol(G, SE);
40990b57cec5SDimitry Andric if (G->isZero() || !GV)
41000b57cec5SDimitry Andric return;
41010b57cec5SDimitry Andric Formula F = Base;
41020b57cec5SDimitry Andric F.BaseGV = GV;
41030b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
41040b57cec5SDimitry Andric return;
41050b57cec5SDimitry Andric if (IsScaledReg)
41060b57cec5SDimitry Andric F.ScaledReg = G;
41070b57cec5SDimitry Andric else
41080b57cec5SDimitry Andric F.BaseRegs[Idx] = G;
41090b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
41100b57cec5SDimitry Andric }
41110b57cec5SDimitry Andric
41120b57cec5SDimitry Andric /// Generate reuse formulae using symbolic offsets.
GenerateSymbolicOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)41130b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
41140b57cec5SDimitry Andric Formula Base) {
41150b57cec5SDimitry Andric // We can't add a symbolic offset if the address already contains one.
41160b57cec5SDimitry Andric if (Base.BaseGV) return;
41170b57cec5SDimitry Andric
41180b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
41190b57cec5SDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
41200b57cec5SDimitry Andric if (Base.Scale == 1)
41210b57cec5SDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
41220b57cec5SDimitry Andric /* IsScaledReg */ true);
41230b57cec5SDimitry Andric }
41240b57cec5SDimitry Andric
41250b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateConstantOffsets.
GenerateConstantOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,const SmallVectorImpl<Immediate> & Worklist,size_t Idx,bool IsScaledReg)41260b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsetsImpl(
41270b57cec5SDimitry Andric LSRUse &LU, unsigned LUIdx, const Formula &Base,
41280fca6ea1SDimitry Andric const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {
41290b57cec5SDimitry Andric
41300fca6ea1SDimitry Andric auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
41310b57cec5SDimitry Andric Formula F = Base;
41320fca6ea1SDimitry Andric if (!Base.BaseOffset.isCompatibleImmediate(Offset))
41330fca6ea1SDimitry Andric return;
41340fca6ea1SDimitry Andric F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);
41350b57cec5SDimitry Andric
4136fe6060f1SDimitry Andric if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
41370b57cec5SDimitry Andric // Add the offset to the base register.
41380fca6ea1SDimitry Andric const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());
41390fca6ea1SDimitry Andric const SCEV *NewG = SE.getAddExpr(NewOffset, G);
41400b57cec5SDimitry Andric // If it cancelled out, drop the base register, otherwise update it.
41410b57cec5SDimitry Andric if (NewG->isZero()) {
41420b57cec5SDimitry Andric if (IsScaledReg) {
41430b57cec5SDimitry Andric F.Scale = 0;
41440b57cec5SDimitry Andric F.ScaledReg = nullptr;
41450b57cec5SDimitry Andric } else
41460b57cec5SDimitry Andric F.deleteBaseReg(F.BaseRegs[Idx]);
41470b57cec5SDimitry Andric F.canonicalize(*L);
41480b57cec5SDimitry Andric } else if (IsScaledReg)
41490b57cec5SDimitry Andric F.ScaledReg = NewG;
41500b57cec5SDimitry Andric else
41510b57cec5SDimitry Andric F.BaseRegs[Idx] = NewG;
41520b57cec5SDimitry Andric
41530b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
41540b57cec5SDimitry Andric }
41550b57cec5SDimitry Andric };
41560b57cec5SDimitry Andric
41570b57cec5SDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
41580b57cec5SDimitry Andric
41590b57cec5SDimitry Andric // With constant offsets and constant steps, we can generate pre-inc
41600b57cec5SDimitry Andric // accesses by having the offset equal the step. So, for access #0 with a
41610b57cec5SDimitry Andric // step of 8, we generate a G - 8 base which would require the first access
41620b57cec5SDimitry Andric // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
41630b57cec5SDimitry Andric // for itself and hopefully becomes the base for other accesses. This means
41640b57cec5SDimitry Andric // means that a single pre-indexed access can be generated to become the new
41650b57cec5SDimitry Andric // base pointer for each iteration of the loop, resulting in no extra add/sub
41660b57cec5SDimitry Andric // instructions for pointer updating.
4167fe6060f1SDimitry Andric if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
41680b57cec5SDimitry Andric if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
41690b57cec5SDimitry Andric if (auto *StepRec =
41700b57cec5SDimitry Andric dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
41710b57cec5SDimitry Andric const APInt &StepInt = StepRec->getAPInt();
41720b57cec5SDimitry Andric int64_t Step = StepInt.isNegative() ?
41730b57cec5SDimitry Andric StepInt.getSExtValue() : StepInt.getZExtValue();
41740b57cec5SDimitry Andric
41750fca6ea1SDimitry Andric for (Immediate Offset : Worklist) {
41760fca6ea1SDimitry Andric if (Offset.isFixed()) {
41770fca6ea1SDimitry Andric Offset = Immediate::getFixed(Offset.getFixedValue() - Step);
41780b57cec5SDimitry Andric GenerateOffset(G, Offset);
41790b57cec5SDimitry Andric }
41800b57cec5SDimitry Andric }
41810b57cec5SDimitry Andric }
41820b57cec5SDimitry Andric }
41830fca6ea1SDimitry Andric }
41840fca6ea1SDimitry Andric for (Immediate Offset : Worklist)
41850b57cec5SDimitry Andric GenerateOffset(G, Offset);
41860b57cec5SDimitry Andric
41870fca6ea1SDimitry Andric Immediate Imm = ExtractImmediate(G, SE);
41880fca6ea1SDimitry Andric if (G->isZero() || Imm.isZero() ||
41890fca6ea1SDimitry Andric !Base.BaseOffset.isCompatibleImmediate(Imm))
41900b57cec5SDimitry Andric return;
41910b57cec5SDimitry Andric Formula F = Base;
41920fca6ea1SDimitry Andric F.BaseOffset = F.BaseOffset.addUnsigned(Imm);
41930b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
41940b57cec5SDimitry Andric return;
4195e8d8bef9SDimitry Andric if (IsScaledReg) {
41960b57cec5SDimitry Andric F.ScaledReg = G;
4197e8d8bef9SDimitry Andric } else {
41980b57cec5SDimitry Andric F.BaseRegs[Idx] = G;
4199e8d8bef9SDimitry Andric // We may generate non canonical Formula if G is a recurrent expr reg
4200e8d8bef9SDimitry Andric // related with current loop while F.ScaledReg is not.
4201e8d8bef9SDimitry Andric F.canonicalize(*L);
4202e8d8bef9SDimitry Andric }
42030b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
42040b57cec5SDimitry Andric }
42050b57cec5SDimitry Andric
42060b57cec5SDimitry Andric /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
GenerateConstantOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)42070b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
42080b57cec5SDimitry Andric Formula Base) {
42090b57cec5SDimitry Andric // TODO: For now, just add the min and max offset, because it usually isn't
42100b57cec5SDimitry Andric // worthwhile looking at everything inbetween.
42110fca6ea1SDimitry Andric SmallVector<Immediate, 2> Worklist;
42120b57cec5SDimitry Andric Worklist.push_back(LU.MinOffset);
42130b57cec5SDimitry Andric if (LU.MaxOffset != LU.MinOffset)
42140b57cec5SDimitry Andric Worklist.push_back(LU.MaxOffset);
42150b57cec5SDimitry Andric
42160b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
42170b57cec5SDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
42180b57cec5SDimitry Andric if (Base.Scale == 1)
42190b57cec5SDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
42200b57cec5SDimitry Andric /* IsScaledReg */ true);
42210b57cec5SDimitry Andric }
42220b57cec5SDimitry Andric
42230b57cec5SDimitry Andric /// For ICmpZero, check to see if we can scale up the comparison. For example, x
42240b57cec5SDimitry Andric /// == y -> x*c == y*c.
GenerateICmpZeroScales(LSRUse & LU,unsigned LUIdx,Formula Base)42250b57cec5SDimitry Andric void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
42260b57cec5SDimitry Andric Formula Base) {
42270b57cec5SDimitry Andric if (LU.Kind != LSRUse::ICmpZero) return;
42280b57cec5SDimitry Andric
42290b57cec5SDimitry Andric // Determine the integer type for the base formula.
42300b57cec5SDimitry Andric Type *IntTy = Base.getType();
42310b57cec5SDimitry Andric if (!IntTy) return;
42320b57cec5SDimitry Andric if (SE.getTypeSizeInBits(IntTy) > 64) return;
42330b57cec5SDimitry Andric
42340b57cec5SDimitry Andric // Don't do this if there is more than one offset.
42350b57cec5SDimitry Andric if (LU.MinOffset != LU.MaxOffset) return;
42360b57cec5SDimitry Andric
42370b57cec5SDimitry Andric // Check if transformation is valid. It is illegal to multiply pointer.
42380b57cec5SDimitry Andric if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
42390b57cec5SDimitry Andric return;
42400b57cec5SDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs)
42410b57cec5SDimitry Andric if (BaseReg->getType()->isPointerTy())
42420b57cec5SDimitry Andric return;
42430b57cec5SDimitry Andric assert(!Base.BaseGV && "ICmpZero use is not legal!");
42440b57cec5SDimitry Andric
42450b57cec5SDimitry Andric // Check each interesting stride.
42460b57cec5SDimitry Andric for (int64_t Factor : Factors) {
4247349cc55cSDimitry Andric // Check that Factor can be represented by IntTy
4248349cc55cSDimitry Andric if (!ConstantInt::isValueValidForType(IntTy, Factor))
4249349cc55cSDimitry Andric continue;
42500b57cec5SDimitry Andric // Check that the multiplication doesn't overflow.
42510fca6ea1SDimitry Andric if (Base.BaseOffset.isMin() && Factor == -1)
42520b57cec5SDimitry Andric continue;
42530fca6ea1SDimitry Andric // Not supporting scalable immediates.
42540fca6ea1SDimitry Andric if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
42550fca6ea1SDimitry Andric continue;
42560fca6ea1SDimitry Andric Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);
4257fe6060f1SDimitry Andric assert(Factor != 0 && "Zero factor not expected!");
42580fca6ea1SDimitry Andric if (NewBaseOffset.getFixedValue() / Factor !=
42590fca6ea1SDimitry Andric Base.BaseOffset.getFixedValue())
42600b57cec5SDimitry Andric continue;
42610b57cec5SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds.
42620b57cec5SDimitry Andric if (!IntTy->isPointerTy() &&
42630fca6ea1SDimitry Andric !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))
42640b57cec5SDimitry Andric continue;
42650b57cec5SDimitry Andric
42660b57cec5SDimitry Andric // Check that multiplying with the use offset doesn't overflow.
42670fca6ea1SDimitry Andric Immediate Offset = LU.MinOffset;
42680fca6ea1SDimitry Andric if (Offset.isMin() && Factor == -1)
42690b57cec5SDimitry Andric continue;
42700fca6ea1SDimitry Andric Offset = Offset.mulUnsigned(Factor);
42710fca6ea1SDimitry Andric if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
42720b57cec5SDimitry Andric continue;
42730b57cec5SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds.
42740b57cec5SDimitry Andric if (!IntTy->isPointerTy() &&
42750fca6ea1SDimitry Andric !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue()))
42760b57cec5SDimitry Andric continue;
42770b57cec5SDimitry Andric
42780b57cec5SDimitry Andric Formula F = Base;
42790b57cec5SDimitry Andric F.BaseOffset = NewBaseOffset;
42800b57cec5SDimitry Andric
42810b57cec5SDimitry Andric // Check that this scale is legal.
42820b57cec5SDimitry Andric if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
42830b57cec5SDimitry Andric continue;
42840b57cec5SDimitry Andric
42850b57cec5SDimitry Andric // Compensate for the use having MinOffset built into it.
42860fca6ea1SDimitry Andric F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);
42870b57cec5SDimitry Andric
42880b57cec5SDimitry Andric const SCEV *FactorS = SE.getConstant(IntTy, Factor);
42890b57cec5SDimitry Andric
42900b57cec5SDimitry Andric // Check that multiplying with each base register doesn't overflow.
42910b57cec5SDimitry Andric for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
42920b57cec5SDimitry Andric F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
42930b57cec5SDimitry Andric if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
42940b57cec5SDimitry Andric goto next;
42950b57cec5SDimitry Andric }
42960b57cec5SDimitry Andric
42970b57cec5SDimitry Andric // Check that multiplying with the scaled register doesn't overflow.
42980b57cec5SDimitry Andric if (F.ScaledReg) {
42990b57cec5SDimitry Andric F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
43000b57cec5SDimitry Andric if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
43010b57cec5SDimitry Andric continue;
43020b57cec5SDimitry Andric }
43030b57cec5SDimitry Andric
43040b57cec5SDimitry Andric // Check that multiplying with the unfolded offset doesn't overflow.
43050fca6ea1SDimitry Andric if (F.UnfoldedOffset.isNonZero()) {
43060fca6ea1SDimitry Andric if (F.UnfoldedOffset.isMin() && Factor == -1)
43070b57cec5SDimitry Andric continue;
43080fca6ea1SDimitry Andric F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);
43090fca6ea1SDimitry Andric if (F.UnfoldedOffset.getFixedValue() / Factor !=
43100fca6ea1SDimitry Andric Base.UnfoldedOffset.getFixedValue())
43110b57cec5SDimitry Andric continue;
43120b57cec5SDimitry Andric // If the offset will be truncated, check that it is in bounds.
43130fca6ea1SDimitry Andric if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
43140fca6ea1SDimitry Andric IntTy, F.UnfoldedOffset.getFixedValue()))
43150b57cec5SDimitry Andric continue;
43160b57cec5SDimitry Andric }
43170b57cec5SDimitry Andric
43180b57cec5SDimitry Andric // If we make it here and it's legal, add it.
43190b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
43200b57cec5SDimitry Andric next:;
43210b57cec5SDimitry Andric }
43220b57cec5SDimitry Andric }
43230b57cec5SDimitry Andric
43240b57cec5SDimitry Andric /// Generate stride factor reuse formulae by making use of scaled-offset address
43250b57cec5SDimitry Andric /// modes, for example.
GenerateScales(LSRUse & LU,unsigned LUIdx,Formula Base)43260b57cec5SDimitry Andric void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
43270b57cec5SDimitry Andric // Determine the integer type for the base formula.
43280b57cec5SDimitry Andric Type *IntTy = Base.getType();
43290b57cec5SDimitry Andric if (!IntTy) return;
43300b57cec5SDimitry Andric
43310b57cec5SDimitry Andric // If this Formula already has a scaled register, we can't add another one.
43320b57cec5SDimitry Andric // Try to unscale the formula to generate a better scale.
43330b57cec5SDimitry Andric if (Base.Scale != 0 && !Base.unscale())
43340b57cec5SDimitry Andric return;
43350b57cec5SDimitry Andric
43360b57cec5SDimitry Andric assert(Base.Scale == 0 && "unscale did not did its job!");
43370b57cec5SDimitry Andric
43380b57cec5SDimitry Andric // Check each interesting stride.
43390b57cec5SDimitry Andric for (int64_t Factor : Factors) {
43400b57cec5SDimitry Andric Base.Scale = Factor;
43410b57cec5SDimitry Andric Base.HasBaseReg = Base.BaseRegs.size() > 1;
43420b57cec5SDimitry Andric // Check whether this scale is going to be legal.
43430b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
43440b57cec5SDimitry Andric Base)) {
43450b57cec5SDimitry Andric // As a special-case, handle special out-of-loop Basic users specially.
43460b57cec5SDimitry Andric // TODO: Reconsider this special case.
43470b57cec5SDimitry Andric if (LU.Kind == LSRUse::Basic &&
43480b57cec5SDimitry Andric isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
43490b57cec5SDimitry Andric LU.AccessTy, Base) &&
43500b57cec5SDimitry Andric LU.AllFixupsOutsideLoop)
43510b57cec5SDimitry Andric LU.Kind = LSRUse::Special;
43520b57cec5SDimitry Andric else
43530b57cec5SDimitry Andric continue;
43540b57cec5SDimitry Andric }
43550b57cec5SDimitry Andric // For an ICmpZero, negating a solitary base register won't lead to
43560b57cec5SDimitry Andric // new solutions.
43570fca6ea1SDimitry Andric if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
43580fca6ea1SDimitry Andric Base.BaseOffset.isZero() && !Base.BaseGV)
43590b57cec5SDimitry Andric continue;
43600b57cec5SDimitry Andric // For each addrec base reg, if its loop is current loop, apply the scale.
43610b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
43620b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
43630b57cec5SDimitry Andric if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
43640b57cec5SDimitry Andric const SCEV *FactorS = SE.getConstant(IntTy, Factor);
43650b57cec5SDimitry Andric if (FactorS->isZero())
43660b57cec5SDimitry Andric continue;
43670b57cec5SDimitry Andric // Divide out the factor, ignoring high bits, since we'll be
43680b57cec5SDimitry Andric // scaling the value back up in the end.
436981ad6265SDimitry Andric if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
437081ad6265SDimitry Andric if (!Quotient->isZero()) {
43710b57cec5SDimitry Andric // TODO: This could be optimized to avoid all the copying.
43720b57cec5SDimitry Andric Formula F = Base;
43730b57cec5SDimitry Andric F.ScaledReg = Quotient;
43740b57cec5SDimitry Andric F.deleteBaseReg(F.BaseRegs[i]);
43750b57cec5SDimitry Andric // The canonical representation of 1*reg is reg, which is already in
43760b57cec5SDimitry Andric // Base. In that case, do not try to insert the formula, it will be
43770b57cec5SDimitry Andric // rejected anyway.
43780b57cec5SDimitry Andric if (F.Scale == 1 && (F.BaseRegs.empty() ||
43790b57cec5SDimitry Andric (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
43800b57cec5SDimitry Andric continue;
43810b57cec5SDimitry Andric // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
43820b57cec5SDimitry Andric // non canonical Formula with ScaledReg's loop not being L.
43830b57cec5SDimitry Andric if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
43840b57cec5SDimitry Andric F.canonicalize(*L);
43850b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
43860b57cec5SDimitry Andric }
43870b57cec5SDimitry Andric }
43880b57cec5SDimitry Andric }
43890b57cec5SDimitry Andric }
43900b57cec5SDimitry Andric }
43910b57cec5SDimitry Andric
439206c3fb27SDimitry Andric /// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
439306c3fb27SDimitry Andric /// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
439406c3fb27SDimitry Andric /// perform the extension/truncate and normalize again, as the normalized form
439506c3fb27SDimitry Andric /// can result in folds that are not valid in the post-inc use contexts. The
439606c3fb27SDimitry Andric /// expressions for all PostIncLoopSets must match, otherwise return nullptr.
439706c3fb27SDimitry Andric static const SCEV *
getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,const SCEV * Expr,Type * ToTy,ScalarEvolution & SE)439806c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
439906c3fb27SDimitry Andric const SCEV *Expr, Type *ToTy,
440006c3fb27SDimitry Andric ScalarEvolution &SE) {
440106c3fb27SDimitry Andric const SCEV *Result = nullptr;
440206c3fb27SDimitry Andric for (auto &L : Loops) {
440306c3fb27SDimitry Andric auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE);
440406c3fb27SDimitry Andric const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);
440506c3fb27SDimitry Andric const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE);
440606c3fb27SDimitry Andric if (!New || (Result && New != Result))
440706c3fb27SDimitry Andric return nullptr;
440806c3fb27SDimitry Andric Result = New;
440906c3fb27SDimitry Andric }
441006c3fb27SDimitry Andric
441106c3fb27SDimitry Andric assert(Result && "failed to create expression");
441206c3fb27SDimitry Andric return Result;
441306c3fb27SDimitry Andric }
441406c3fb27SDimitry Andric
44150b57cec5SDimitry Andric /// Generate reuse formulae from different IV types.
GenerateTruncates(LSRUse & LU,unsigned LUIdx,Formula Base)44160b57cec5SDimitry Andric void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
44170b57cec5SDimitry Andric // Don't bother truncating symbolic values.
44180b57cec5SDimitry Andric if (Base.BaseGV) return;
44190b57cec5SDimitry Andric
44200b57cec5SDimitry Andric // Determine the integer type for the base formula.
44210b57cec5SDimitry Andric Type *DstTy = Base.getType();
44220b57cec5SDimitry Andric if (!DstTy) return;
4423fe6060f1SDimitry Andric if (DstTy->isPointerTy())
4424fe6060f1SDimitry Andric return;
44250b57cec5SDimitry Andric
4426349cc55cSDimitry Andric // It is invalid to extend a pointer type so exit early if ScaledReg or
4427349cc55cSDimitry Andric // any of the BaseRegs are pointers.
4428349cc55cSDimitry Andric if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4429349cc55cSDimitry Andric return;
4430349cc55cSDimitry Andric if (any_of(Base.BaseRegs,
4431349cc55cSDimitry Andric [](const SCEV *S) { return S->getType()->isPointerTy(); }))
4432349cc55cSDimitry Andric return;
4433349cc55cSDimitry Andric
443406c3fb27SDimitry Andric SmallVector<PostIncLoopSet> Loops;
443506c3fb27SDimitry Andric for (auto &LF : LU.Fixups)
443606c3fb27SDimitry Andric Loops.push_back(LF.PostIncLoops);
443706c3fb27SDimitry Andric
44380b57cec5SDimitry Andric for (Type *SrcTy : Types) {
44390b57cec5SDimitry Andric if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
44400b57cec5SDimitry Andric Formula F = Base;
44410b57cec5SDimitry Andric
44420b57cec5SDimitry Andric // Sometimes SCEV is able to prove zero during ext transform. It may
44430b57cec5SDimitry Andric // happen if SCEV did not do all possible transforms while creating the
44440b57cec5SDimitry Andric // initial node (maybe due to depth limitations), but it can do them while
44450b57cec5SDimitry Andric // taking ext.
44460b57cec5SDimitry Andric if (F.ScaledReg) {
444706c3fb27SDimitry Andric const SCEV *NewScaledReg =
444806c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE);
444906c3fb27SDimitry Andric if (!NewScaledReg || NewScaledReg->isZero())
44500b57cec5SDimitry Andric continue;
44510b57cec5SDimitry Andric F.ScaledReg = NewScaledReg;
44520b57cec5SDimitry Andric }
44530b57cec5SDimitry Andric bool HasZeroBaseReg = false;
44540b57cec5SDimitry Andric for (const SCEV *&BaseReg : F.BaseRegs) {
445506c3fb27SDimitry Andric const SCEV *NewBaseReg =
445606c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);
445706c3fb27SDimitry Andric if (!NewBaseReg || NewBaseReg->isZero()) {
44580b57cec5SDimitry Andric HasZeroBaseReg = true;
44590b57cec5SDimitry Andric break;
44600b57cec5SDimitry Andric }
44610b57cec5SDimitry Andric BaseReg = NewBaseReg;
44620b57cec5SDimitry Andric }
44630b57cec5SDimitry Andric if (HasZeroBaseReg)
44640b57cec5SDimitry Andric continue;
44650b57cec5SDimitry Andric
44660b57cec5SDimitry Andric // TODO: This assumes we've done basic processing on all uses and
44670b57cec5SDimitry Andric // have an idea what the register usage is.
44680b57cec5SDimitry Andric if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
44690b57cec5SDimitry Andric continue;
44700b57cec5SDimitry Andric
44710b57cec5SDimitry Andric F.canonicalize(*L);
44720b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F);
44730b57cec5SDimitry Andric }
44740b57cec5SDimitry Andric }
44750b57cec5SDimitry Andric }
44760b57cec5SDimitry Andric
44770b57cec5SDimitry Andric namespace {
44780b57cec5SDimitry Andric
44790b57cec5SDimitry Andric /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
44800b57cec5SDimitry Andric /// modifications so that the search phase doesn't have to worry about the data
44810b57cec5SDimitry Andric /// structures moving underneath it.
44820b57cec5SDimitry Andric struct WorkItem {
44830b57cec5SDimitry Andric size_t LUIdx;
44840fca6ea1SDimitry Andric Immediate Imm;
44850b57cec5SDimitry Andric const SCEV *OrigReg;
44860b57cec5SDimitry Andric
WorkItem__anonc21373340e11::WorkItem44870fca6ea1SDimitry Andric WorkItem(size_t LI, Immediate I, const SCEV *R)
44880b57cec5SDimitry Andric : LUIdx(LI), Imm(I), OrigReg(R) {}
44890b57cec5SDimitry Andric
44900b57cec5SDimitry Andric void print(raw_ostream &OS) const;
44910b57cec5SDimitry Andric void dump() const;
44920b57cec5SDimitry Andric };
44930b57cec5SDimitry Andric
44940b57cec5SDimitry Andric } // end anonymous namespace
44950b57cec5SDimitry Andric
44960b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const44970b57cec5SDimitry Andric void WorkItem::print(raw_ostream &OS) const {
44980b57cec5SDimitry Andric OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
44990b57cec5SDimitry Andric << " , add offset " << Imm;
45000b57cec5SDimitry Andric }
45010b57cec5SDimitry Andric
dump() const45020b57cec5SDimitry Andric LLVM_DUMP_METHOD void WorkItem::dump() const {
45030b57cec5SDimitry Andric print(errs()); errs() << '\n';
45040b57cec5SDimitry Andric }
45050b57cec5SDimitry Andric #endif
45060b57cec5SDimitry Andric
45070b57cec5SDimitry Andric /// Look for registers which are a constant distance apart and try to form reuse
45080b57cec5SDimitry Andric /// opportunities between them.
GenerateCrossUseConstantOffsets()45090b57cec5SDimitry Andric void LSRInstance::GenerateCrossUseConstantOffsets() {
45100b57cec5SDimitry Andric // Group the registers by their value without any added constant offset.
45110fca6ea1SDimitry Andric using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
45120b57cec5SDimitry Andric
45130b57cec5SDimitry Andric DenseMap<const SCEV *, ImmMapTy> Map;
45140b57cec5SDimitry Andric DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
45150b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Sequence;
45160b57cec5SDimitry Andric for (const SCEV *Use : RegUses) {
45170b57cec5SDimitry Andric const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
45180fca6ea1SDimitry Andric Immediate Imm = ExtractImmediate(Reg, SE);
45190b57cec5SDimitry Andric auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
45200b57cec5SDimitry Andric if (Pair.second)
45210b57cec5SDimitry Andric Sequence.push_back(Reg);
45220b57cec5SDimitry Andric Pair.first->second.insert(std::make_pair(Imm, Use));
45230b57cec5SDimitry Andric UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
45240b57cec5SDimitry Andric }
45250b57cec5SDimitry Andric
45260b57cec5SDimitry Andric // Now examine each set of registers with the same base value. Build up
45270b57cec5SDimitry Andric // a list of work to do and do the work in a separate step so that we're
45280b57cec5SDimitry Andric // not adding formulae and register counts while we're searching.
45290b57cec5SDimitry Andric SmallVector<WorkItem, 32> WorkItems;
45300fca6ea1SDimitry Andric SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
45310fca6ea1SDimitry Andric UniqueItems;
45320b57cec5SDimitry Andric for (const SCEV *Reg : Sequence) {
45330b57cec5SDimitry Andric const ImmMapTy &Imms = Map.find(Reg)->second;
45340b57cec5SDimitry Andric
45350b57cec5SDimitry Andric // It's not worthwhile looking for reuse if there's only one offset.
45360b57cec5SDimitry Andric if (Imms.size() == 1)
45370b57cec5SDimitry Andric continue;
45380b57cec5SDimitry Andric
45390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
45400b57cec5SDimitry Andric for (const auto &Entry
45410b57cec5SDimitry Andric : Imms) dbgs()
45420b57cec5SDimitry Andric << ' ' << Entry.first;
45430b57cec5SDimitry Andric dbgs() << '\n');
45440b57cec5SDimitry Andric
45450b57cec5SDimitry Andric // Examine each offset.
45460b57cec5SDimitry Andric for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
45470b57cec5SDimitry Andric J != JE; ++J) {
45480b57cec5SDimitry Andric const SCEV *OrigReg = J->second;
45490b57cec5SDimitry Andric
45500fca6ea1SDimitry Andric Immediate JImm = J->first;
45510b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
45520b57cec5SDimitry Andric
45530b57cec5SDimitry Andric if (!isa<SCEVConstant>(OrigReg) &&
45540b57cec5SDimitry Andric UsedByIndicesMap[Reg].count() == 1) {
45550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
45560b57cec5SDimitry Andric << '\n');
45570b57cec5SDimitry Andric continue;
45580b57cec5SDimitry Andric }
45590b57cec5SDimitry Andric
45600b57cec5SDimitry Andric // Conservatively examine offsets between this orig reg a few selected
45610b57cec5SDimitry Andric // other orig regs.
45620fca6ea1SDimitry Andric Immediate First = Imms.begin()->first;
45630fca6ea1SDimitry Andric Immediate Last = std::prev(Imms.end())->first;
45640fca6ea1SDimitry Andric if (!First.isCompatibleImmediate(Last)) {
45650fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
45660fca6ea1SDimitry Andric << "\n");
45670fca6ea1SDimitry Andric continue;
45680fca6ea1SDimitry Andric }
45690fca6ea1SDimitry Andric // Only scalable if both terms are scalable, or if one is scalable and
45700fca6ea1SDimitry Andric // the other is 0.
45710fca6ea1SDimitry Andric bool Scalable = First.isScalable() || Last.isScalable();
45720fca6ea1SDimitry Andric int64_t FI = First.getKnownMinValue();
45730fca6ea1SDimitry Andric int64_t LI = Last.getKnownMinValue();
45740b57cec5SDimitry Andric // Compute (First + Last) / 2 without overflow using the fact that
45750b57cec5SDimitry Andric // First + Last = 2 * (First + Last) + (First ^ Last).
45760fca6ea1SDimitry Andric int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
45770fca6ea1SDimitry Andric // If the result is negative and FI is odd and LI even (or vice versa),
45780b57cec5SDimitry Andric // we rounded towards -inf. Add 1 in that case, to round towards 0.
45790fca6ea1SDimitry Andric Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
45800b57cec5SDimitry Andric ImmMapTy::const_iterator OtherImms[] = {
45810b57cec5SDimitry Andric Imms.begin(), std::prev(Imms.end()),
45820fca6ea1SDimitry Andric Imms.lower_bound(Immediate::get(Avg, Scalable))};
4583bdd1243dSDimitry Andric for (const auto &M : OtherImms) {
45840b57cec5SDimitry Andric if (M == J || M == JE) continue;
45850fca6ea1SDimitry Andric if (!JImm.isCompatibleImmediate(M->first))
45860fca6ea1SDimitry Andric continue;
45870b57cec5SDimitry Andric
45880b57cec5SDimitry Andric // Compute the difference between the two.
45890fca6ea1SDimitry Andric Immediate Imm = JImm.subUnsigned(M->first);
45900b57cec5SDimitry Andric for (unsigned LUIdx : UsedByIndices.set_bits())
45910b57cec5SDimitry Andric // Make a memo of this use, offset, and register tuple.
45920b57cec5SDimitry Andric if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
45930b57cec5SDimitry Andric WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
45940b57cec5SDimitry Andric }
45950b57cec5SDimitry Andric }
45960b57cec5SDimitry Andric }
45970b57cec5SDimitry Andric
45980b57cec5SDimitry Andric Map.clear();
45990b57cec5SDimitry Andric Sequence.clear();
46000b57cec5SDimitry Andric UsedByIndicesMap.clear();
46010b57cec5SDimitry Andric UniqueItems.clear();
46020b57cec5SDimitry Andric
46030b57cec5SDimitry Andric // Now iterate through the worklist and add new formulae.
46040b57cec5SDimitry Andric for (const WorkItem &WI : WorkItems) {
46050b57cec5SDimitry Andric size_t LUIdx = WI.LUIdx;
46060b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
46070fca6ea1SDimitry Andric Immediate Imm = WI.Imm;
46080b57cec5SDimitry Andric const SCEV *OrigReg = WI.OrigReg;
46090b57cec5SDimitry Andric
46100b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
46110fca6ea1SDimitry Andric const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);
46120b57cec5SDimitry Andric unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
46130b57cec5SDimitry Andric
46140b57cec5SDimitry Andric // TODO: Use a more targeted data structure.
46150b57cec5SDimitry Andric for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
46160b57cec5SDimitry Andric Formula F = LU.Formulae[L];
46170b57cec5SDimitry Andric // FIXME: The code for the scaled and unscaled registers looks
46180b57cec5SDimitry Andric // very similar but slightly different. Investigate if they
46190b57cec5SDimitry Andric // could be merged. That way, we would not have to unscale the
46200b57cec5SDimitry Andric // Formula.
46210b57cec5SDimitry Andric F.unscale();
46220b57cec5SDimitry Andric // Use the immediate in the scaled register.
46230b57cec5SDimitry Andric if (F.ScaledReg == OrigReg) {
46240fca6ea1SDimitry Andric if (!F.BaseOffset.isCompatibleImmediate(Imm))
46250fca6ea1SDimitry Andric continue;
46260fca6ea1SDimitry Andric Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));
46270b57cec5SDimitry Andric // Don't create 50 + reg(-50).
46280fca6ea1SDimitry Andric const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);
46290fca6ea1SDimitry Andric if (F.referencesReg(S))
46300b57cec5SDimitry Andric continue;
46310b57cec5SDimitry Andric Formula NewF = F;
46320b57cec5SDimitry Andric NewF.BaseOffset = Offset;
46330b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
46340b57cec5SDimitry Andric NewF))
46350b57cec5SDimitry Andric continue;
46360b57cec5SDimitry Andric NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
46370b57cec5SDimitry Andric
46380b57cec5SDimitry Andric // If the new scale is a constant in a register, and adding the constant
46390b57cec5SDimitry Andric // value to the immediate would produce a value closer to zero than the
46400b57cec5SDimitry Andric // immediate itself, then the formula isn't worthwhile.
46410fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
46420fca6ea1SDimitry Andric // FIXME: Do we need to do something for scalable immediates here?
46430fca6ea1SDimitry Andric // A scalable SCEV won't be constant, but we might still have
46440fca6ea1SDimitry Andric // something in the offset? Bail out for now to be safe.
46450fca6ea1SDimitry Andric if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
46460b57cec5SDimitry Andric continue;
46470fca6ea1SDimitry Andric if (C->getValue()->isNegative() !=
46480fca6ea1SDimitry Andric (NewF.BaseOffset.isLessThanZero()) &&
46490fca6ea1SDimitry Andric (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
46500fca6ea1SDimitry Andric .ule(std::abs(NewF.BaseOffset.getFixedValue())))
46510fca6ea1SDimitry Andric continue;
46520fca6ea1SDimitry Andric }
46530b57cec5SDimitry Andric
46540b57cec5SDimitry Andric // OK, looks good.
46550b57cec5SDimitry Andric NewF.canonicalize(*this->L);
46560b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, NewF);
46570b57cec5SDimitry Andric } else {
46580b57cec5SDimitry Andric // Use the immediate in a base register.
46590b57cec5SDimitry Andric for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
46600b57cec5SDimitry Andric const SCEV *BaseReg = F.BaseRegs[N];
46610b57cec5SDimitry Andric if (BaseReg != OrigReg)
46620b57cec5SDimitry Andric continue;
46630b57cec5SDimitry Andric Formula NewF = F;
46640fca6ea1SDimitry Andric if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
46650fca6ea1SDimitry Andric !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
46660fca6ea1SDimitry Andric !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
46670fca6ea1SDimitry Andric continue;
46680fca6ea1SDimitry Andric NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
46690b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
46700b57cec5SDimitry Andric LU.Kind, LU.AccessTy, NewF)) {
4671fe6060f1SDimitry Andric if (AMK == TTI::AMK_PostIndexed &&
46720b57cec5SDimitry Andric mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
46730b57cec5SDimitry Andric continue;
46740fca6ea1SDimitry Andric Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
46750fca6ea1SDimitry Andric if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))
46760b57cec5SDimitry Andric continue;
46770b57cec5SDimitry Andric NewF = F;
46780fca6ea1SDimitry Andric NewF.UnfoldedOffset = NewUnfoldedOffset;
46790b57cec5SDimitry Andric }
46800b57cec5SDimitry Andric NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
46810b57cec5SDimitry Andric
46820b57cec5SDimitry Andric // If the new formula has a constant in a register, and adding the
46830b57cec5SDimitry Andric // constant value to the immediate would produce a value closer to
46840b57cec5SDimitry Andric // zero than the immediate itself, then the formula isn't worthwhile.
46850b57cec5SDimitry Andric for (const SCEV *NewReg : NewF.BaseRegs)
46860fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {
46870fca6ea1SDimitry Andric if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
46880b57cec5SDimitry Andric goto skip_formula;
46890fca6ea1SDimitry Andric if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
46900fca6ea1SDimitry Andric .abs()
46910fca6ea1SDimitry Andric .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
46920fca6ea1SDimitry Andric (C->getAPInt() + NewF.BaseOffset.getFixedValue())
46930fca6ea1SDimitry Andric .countr_zero() >=
46940fca6ea1SDimitry Andric (unsigned)llvm::countr_zero<uint64_t>(
46950fca6ea1SDimitry Andric NewF.BaseOffset.getFixedValue()))
46960fca6ea1SDimitry Andric goto skip_formula;
46970fca6ea1SDimitry Andric }
46980b57cec5SDimitry Andric
46990b57cec5SDimitry Andric // Ok, looks good.
47000b57cec5SDimitry Andric NewF.canonicalize(*this->L);
47010b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, NewF);
47020b57cec5SDimitry Andric break;
47030b57cec5SDimitry Andric skip_formula:;
47040b57cec5SDimitry Andric }
47050b57cec5SDimitry Andric }
47060b57cec5SDimitry Andric }
47070b57cec5SDimitry Andric }
47080b57cec5SDimitry Andric }
47090b57cec5SDimitry Andric
47100b57cec5SDimitry Andric /// Generate formulae for each use.
47110b57cec5SDimitry Andric void
GenerateAllReuseFormulae()47120b57cec5SDimitry Andric LSRInstance::GenerateAllReuseFormulae() {
47130b57cec5SDimitry Andric // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
47140b57cec5SDimitry Andric // queries are more precise.
47150b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47160b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
47170b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47180b57cec5SDimitry Andric GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
47190b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47200b57cec5SDimitry Andric GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
47210b57cec5SDimitry Andric }
47220b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47230b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
47240b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47250b57cec5SDimitry Andric GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
47260b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47270b57cec5SDimitry Andric GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
47280b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47290b57cec5SDimitry Andric GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
47300b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47310b57cec5SDimitry Andric GenerateScales(LU, LUIdx, LU.Formulae[i]);
47320b57cec5SDimitry Andric }
47330b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47340b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
47350b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47360b57cec5SDimitry Andric GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
47370b57cec5SDimitry Andric }
47380b57cec5SDimitry Andric
47390b57cec5SDimitry Andric GenerateCrossUseConstantOffsets();
47400b57cec5SDimitry Andric
47410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"
47420b57cec5SDimitry Andric "After generating reuse formulae:\n";
47430b57cec5SDimitry Andric print_uses(dbgs()));
47440b57cec5SDimitry Andric }
47450b57cec5SDimitry Andric
47460b57cec5SDimitry Andric /// If there are multiple formulae with the same set of registers used
47470b57cec5SDimitry Andric /// by other uses, pick the best one and delete the others.
FilterOutUndesirableDedicatedRegisters()47480b57cec5SDimitry Andric void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
47490b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs;
47500b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs;
47510b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> LoserRegs;
47520b57cec5SDimitry Andric #ifndef NDEBUG
47530b57cec5SDimitry Andric bool ChangedFormulae = false;
47540b57cec5SDimitry Andric #endif
47550b57cec5SDimitry Andric
47560b57cec5SDimitry Andric // Collect the best formula for each unique set of shared registers. This
47570b57cec5SDimitry Andric // is reset for each use.
47580b57cec5SDimitry Andric using BestFormulaeTy =
47590b57cec5SDimitry Andric DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
47600b57cec5SDimitry Andric
47610b57cec5SDimitry Andric BestFormulaeTy BestFormulae;
47620b57cec5SDimitry Andric
47630b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47640b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
47650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
47660b57cec5SDimitry Andric dbgs() << '\n');
47670b57cec5SDimitry Andric
47680b57cec5SDimitry Andric bool Any = false;
47690b57cec5SDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size();
47700b57cec5SDimitry Andric FIdx != NumForms; ++FIdx) {
47710b57cec5SDimitry Andric Formula &F = LU.Formulae[FIdx];
47720b57cec5SDimitry Andric
47730b57cec5SDimitry Andric // Some formulas are instant losers. For example, they may depend on
47740b57cec5SDimitry Andric // nonexistent AddRecs from other loops. These need to be filtered
47750b57cec5SDimitry Andric // immediately, otherwise heuristics could choose them over others leading
47760b57cec5SDimitry Andric // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
47770b57cec5SDimitry Andric // avoids the need to recompute this information across formulae using the
47780b57cec5SDimitry Andric // same bad AddRec. Passing LoserRegs is also essential unless we remove
47790b57cec5SDimitry Andric // the corresponding bad register from the Regs set.
4780fe6060f1SDimitry Andric Cost CostF(L, SE, TTI, AMK);
47810b57cec5SDimitry Andric Regs.clear();
47820b57cec5SDimitry Andric CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
47830b57cec5SDimitry Andric if (CostF.isLoser()) {
47840b57cec5SDimitry Andric // During initial formula generation, undesirable formulae are generated
47850b57cec5SDimitry Andric // by uses within other loops that have some non-trivial address mode or
47860b57cec5SDimitry Andric // use the postinc form of the IV. LSR needs to provide these formulae
47870b57cec5SDimitry Andric // as the basis of rediscovering the desired formula that uses an AddRec
47880b57cec5SDimitry Andric // corresponding to the existing phi. Once all formulae have been
47890b57cec5SDimitry Andric // generated, these initial losers may be pruned.
47900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
47910b57cec5SDimitry Andric dbgs() << "\n");
47920b57cec5SDimitry Andric }
47930b57cec5SDimitry Andric else {
47940b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key;
47950b57cec5SDimitry Andric for (const SCEV *Reg : F.BaseRegs) {
47960b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
47970b57cec5SDimitry Andric Key.push_back(Reg);
47980b57cec5SDimitry Andric }
47990b57cec5SDimitry Andric if (F.ScaledReg &&
48000b57cec5SDimitry Andric RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
48010b57cec5SDimitry Andric Key.push_back(F.ScaledReg);
48020b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for
48030b57cec5SDimitry Andric // uniquifying.
48040b57cec5SDimitry Andric llvm::sort(Key);
48050b57cec5SDimitry Andric
48060b57cec5SDimitry Andric std::pair<BestFormulaeTy::const_iterator, bool> P =
48070b57cec5SDimitry Andric BestFormulae.insert(std::make_pair(Key, FIdx));
48080b57cec5SDimitry Andric if (P.second)
48090b57cec5SDimitry Andric continue;
48100b57cec5SDimitry Andric
48110b57cec5SDimitry Andric Formula &Best = LU.Formulae[P.first->second];
48120b57cec5SDimitry Andric
4813fe6060f1SDimitry Andric Cost CostBest(L, SE, TTI, AMK);
48140b57cec5SDimitry Andric Regs.clear();
48150b57cec5SDimitry Andric CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
48160b57cec5SDimitry Andric if (CostF.isLess(CostBest))
48170b57cec5SDimitry Andric std::swap(F, Best);
48180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
48190b57cec5SDimitry Andric dbgs() << "\n"
48200b57cec5SDimitry Andric " in favor of formula ";
48210b57cec5SDimitry Andric Best.print(dbgs()); dbgs() << '\n');
48220b57cec5SDimitry Andric }
48230b57cec5SDimitry Andric #ifndef NDEBUG
48240b57cec5SDimitry Andric ChangedFormulae = true;
48250b57cec5SDimitry Andric #endif
48260b57cec5SDimitry Andric LU.DeleteFormula(F);
48270b57cec5SDimitry Andric --FIdx;
48280b57cec5SDimitry Andric --NumForms;
48290b57cec5SDimitry Andric Any = true;
48300b57cec5SDimitry Andric }
48310b57cec5SDimitry Andric
48320b57cec5SDimitry Andric // Now that we've filtered out some formulae, recompute the Regs set.
48330b57cec5SDimitry Andric if (Any)
48340b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
48350b57cec5SDimitry Andric
48360b57cec5SDimitry Andric // Reset this to prepare for the next use.
48370b57cec5SDimitry Andric BestFormulae.clear();
48380b57cec5SDimitry Andric }
48390b57cec5SDimitry Andric
48400b57cec5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) {
48410b57cec5SDimitry Andric dbgs() << "\n"
48420b57cec5SDimitry Andric "After filtering out undesirable candidates:\n";
48430b57cec5SDimitry Andric print_uses(dbgs());
48440b57cec5SDimitry Andric });
48450b57cec5SDimitry Andric }
48460b57cec5SDimitry Andric
48470b57cec5SDimitry Andric /// Estimate the worst-case number of solutions the solver might have to
48480b57cec5SDimitry Andric /// consider. It almost never considers this many solutions because it prune the
48490b57cec5SDimitry Andric /// search space, but the pruning isn't always sufficient.
EstimateSearchSpaceComplexity() const48500b57cec5SDimitry Andric size_t LSRInstance::EstimateSearchSpaceComplexity() const {
48510b57cec5SDimitry Andric size_t Power = 1;
48520b57cec5SDimitry Andric for (const LSRUse &LU : Uses) {
48530b57cec5SDimitry Andric size_t FSize = LU.Formulae.size();
48540b57cec5SDimitry Andric if (FSize >= ComplexityLimit) {
48550b57cec5SDimitry Andric Power = ComplexityLimit;
48560b57cec5SDimitry Andric break;
48570b57cec5SDimitry Andric }
48580b57cec5SDimitry Andric Power *= FSize;
48590b57cec5SDimitry Andric if (Power >= ComplexityLimit)
48600b57cec5SDimitry Andric break;
48610b57cec5SDimitry Andric }
48620b57cec5SDimitry Andric return Power;
48630b57cec5SDimitry Andric }
48640b57cec5SDimitry Andric
48650b57cec5SDimitry Andric /// When one formula uses a superset of the registers of another formula, it
48660b57cec5SDimitry Andric /// won't help reduce register pressure (though it may not necessarily hurt
48670b57cec5SDimitry Andric /// register pressure); remove it to simplify the system.
NarrowSearchSpaceByDetectingSupersets()48680b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
48690b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
48700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
48710b57cec5SDimitry Andric
48720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
48730b57cec5SDimitry Andric "which use a superset of registers used by other "
48740b57cec5SDimitry Andric "formulae.\n");
48750b57cec5SDimitry Andric
48760b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
48770b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
48780b57cec5SDimitry Andric bool Any = false;
48790b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
48800b57cec5SDimitry Andric Formula &F = LU.Formulae[i];
48810fca6ea1SDimitry Andric if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
48820fca6ea1SDimitry Andric continue;
48830b57cec5SDimitry Andric // Look for a formula with a constant or GV in a register. If the use
48840b57cec5SDimitry Andric // also has a formula with that same value in an immediate field,
48850b57cec5SDimitry Andric // delete the one that uses a register.
48860b57cec5SDimitry Andric for (SmallVectorImpl<const SCEV *>::const_iterator
48870b57cec5SDimitry Andric I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
48880b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
48890b57cec5SDimitry Andric Formula NewF = F;
48900b57cec5SDimitry Andric //FIXME: Formulas should store bitwidth to do wrapping properly.
48910b57cec5SDimitry Andric // See PR41034.
48920fca6ea1SDimitry Andric NewF.BaseOffset =
48930fca6ea1SDimitry Andric Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
48940fca6ea1SDimitry Andric (uint64_t)C->getValue()->getSExtValue());
48950b57cec5SDimitry Andric NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
48960b57cec5SDimitry Andric (I - F.BaseRegs.begin()));
48970b57cec5SDimitry Andric if (LU.HasFormulaWithSameRegs(NewF)) {
48980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
48990b57cec5SDimitry Andric dbgs() << '\n');
49000b57cec5SDimitry Andric LU.DeleteFormula(F);
49010b57cec5SDimitry Andric --i;
49020b57cec5SDimitry Andric --e;
49030b57cec5SDimitry Andric Any = true;
49040b57cec5SDimitry Andric break;
49050b57cec5SDimitry Andric }
49060b57cec5SDimitry Andric } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
49070b57cec5SDimitry Andric if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
49080b57cec5SDimitry Andric if (!F.BaseGV) {
49090b57cec5SDimitry Andric Formula NewF = F;
49100b57cec5SDimitry Andric NewF.BaseGV = GV;
49110b57cec5SDimitry Andric NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
49120b57cec5SDimitry Andric (I - F.BaseRegs.begin()));
49130b57cec5SDimitry Andric if (LU.HasFormulaWithSameRegs(NewF)) {
49140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
49150b57cec5SDimitry Andric dbgs() << '\n');
49160b57cec5SDimitry Andric LU.DeleteFormula(F);
49170b57cec5SDimitry Andric --i;
49180b57cec5SDimitry Andric --e;
49190b57cec5SDimitry Andric Any = true;
49200b57cec5SDimitry Andric break;
49210b57cec5SDimitry Andric }
49220b57cec5SDimitry Andric }
49230b57cec5SDimitry Andric }
49240b57cec5SDimitry Andric }
49250b57cec5SDimitry Andric }
49260b57cec5SDimitry Andric if (Any)
49270b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
49280b57cec5SDimitry Andric }
49290b57cec5SDimitry Andric
49300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
49310b57cec5SDimitry Andric }
49320b57cec5SDimitry Andric }
49330b57cec5SDimitry Andric
49340b57cec5SDimitry Andric /// When there are many registers for expressions like A, A+1, A+2, etc.,
49350b57cec5SDimitry Andric /// allocate a single register for them.
NarrowSearchSpaceByCollapsingUnrolledCode()49360b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
49370b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
49380b57cec5SDimitry Andric return;
49390b57cec5SDimitry Andric
49400b57cec5SDimitry Andric LLVM_DEBUG(
49410b57cec5SDimitry Andric dbgs() << "The search space is too complex.\n"
49420b57cec5SDimitry Andric "Narrowing the search space by assuming that uses separated "
49430b57cec5SDimitry Andric "by a constant offset will use the same registers.\n");
49440b57cec5SDimitry Andric
49450b57cec5SDimitry Andric // This is especially useful for unrolled loops.
49460b57cec5SDimitry Andric
49470b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
49480b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
49490b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) {
49500fca6ea1SDimitry Andric if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))
49510b57cec5SDimitry Andric continue;
49520b57cec5SDimitry Andric
49530b57cec5SDimitry Andric LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
49540b57cec5SDimitry Andric if (!LUThatHas)
49550b57cec5SDimitry Andric continue;
49560b57cec5SDimitry Andric
49570b57cec5SDimitry Andric if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
49580b57cec5SDimitry Andric LU.Kind, LU.AccessTy))
49590b57cec5SDimitry Andric continue;
49600b57cec5SDimitry Andric
49610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
49620b57cec5SDimitry Andric
49630b57cec5SDimitry Andric LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
49640b57cec5SDimitry Andric
49650b57cec5SDimitry Andric // Transfer the fixups of LU to LUThatHas.
49660b57cec5SDimitry Andric for (LSRFixup &Fixup : LU.Fixups) {
49670b57cec5SDimitry Andric Fixup.Offset += F.BaseOffset;
49680b57cec5SDimitry Andric LUThatHas->pushFixup(Fixup);
49690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
49700b57cec5SDimitry Andric }
49710b57cec5SDimitry Andric
49720b57cec5SDimitry Andric // Delete formulae from the new use which are no longer legal.
49730b57cec5SDimitry Andric bool Any = false;
49740b57cec5SDimitry Andric for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
49750b57cec5SDimitry Andric Formula &F = LUThatHas->Formulae[i];
49760b57cec5SDimitry Andric if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
49770b57cec5SDimitry Andric LUThatHas->Kind, LUThatHas->AccessTy, F)) {
49780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
49790b57cec5SDimitry Andric LUThatHas->DeleteFormula(F);
49800b57cec5SDimitry Andric --i;
49810b57cec5SDimitry Andric --e;
49820b57cec5SDimitry Andric Any = true;
49830b57cec5SDimitry Andric }
49840b57cec5SDimitry Andric }
49850b57cec5SDimitry Andric
49860b57cec5SDimitry Andric if (Any)
49870b57cec5SDimitry Andric LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
49880b57cec5SDimitry Andric
49890b57cec5SDimitry Andric // Delete the old use.
49900b57cec5SDimitry Andric DeleteUse(LU, LUIdx);
49910b57cec5SDimitry Andric --LUIdx;
49920b57cec5SDimitry Andric --NumUses;
49930b57cec5SDimitry Andric break;
49940b57cec5SDimitry Andric }
49950b57cec5SDimitry Andric }
49960b57cec5SDimitry Andric
49970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
49980b57cec5SDimitry Andric }
49990b57cec5SDimitry Andric
50000b57cec5SDimitry Andric /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
50010b57cec5SDimitry Andric /// we've done more filtering, as it may be able to find more formulae to
50020b57cec5SDimitry Andric /// eliminate.
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters()50030b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
50040b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
50050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
50060b57cec5SDimitry Andric
50070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
50080b57cec5SDimitry Andric "undesirable dedicated registers.\n");
50090b57cec5SDimitry Andric
50100b57cec5SDimitry Andric FilterOutUndesirableDedicatedRegisters();
50110b57cec5SDimitry Andric
50120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
50130b57cec5SDimitry Andric }
50140b57cec5SDimitry Andric }
50150b57cec5SDimitry Andric
50160b57cec5SDimitry Andric /// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
50170b57cec5SDimitry Andric /// Pick the best one and delete the others.
50180b57cec5SDimitry Andric /// This narrowing heuristic is to keep as many formulae with different
50190b57cec5SDimitry Andric /// Scale and ScaledReg pair as possible while narrowing the search space.
50200b57cec5SDimitry Andric /// The benefit is that it is more likely to find out a better solution
50210b57cec5SDimitry Andric /// from a formulae set with more Scale and ScaledReg variations than
50220b57cec5SDimitry Andric /// a formulae set with the same Scale and ScaledReg. The picking winner
50230b57cec5SDimitry Andric /// reg heuristic will often keep the formulae with the same Scale and
50240b57cec5SDimitry Andric /// ScaledReg and filter others, and we want to avoid that if possible.
NarrowSearchSpaceByFilterFormulaWithSameScaledReg()50250b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
50260b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
50270b57cec5SDimitry Andric return;
50280b57cec5SDimitry Andric
50290b57cec5SDimitry Andric LLVM_DEBUG(
50300b57cec5SDimitry Andric dbgs() << "The search space is too complex.\n"
50310b57cec5SDimitry Andric "Narrowing the search space by choosing the best Formula "
50320b57cec5SDimitry Andric "from the Formulae with the same Scale and ScaledReg.\n");
50330b57cec5SDimitry Andric
50340b57cec5SDimitry Andric // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
50350b57cec5SDimitry Andric using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
50360b57cec5SDimitry Andric
50370b57cec5SDimitry Andric BestFormulaeTy BestFormulae;
50380b57cec5SDimitry Andric #ifndef NDEBUG
50390b57cec5SDimitry Andric bool ChangedFormulae = false;
50400b57cec5SDimitry Andric #endif
50410b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs;
50420b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs;
50430b57cec5SDimitry Andric
50440b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
50450b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
50460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
50470b57cec5SDimitry Andric dbgs() << '\n');
50480b57cec5SDimitry Andric
50490b57cec5SDimitry Andric // Return true if Formula FA is better than Formula FB.
50500b57cec5SDimitry Andric auto IsBetterThan = [&](Formula &FA, Formula &FB) {
50510b57cec5SDimitry Andric // First we will try to choose the Formula with fewer new registers.
50520b57cec5SDimitry Andric // For a register used by current Formula, the more the register is
50530b57cec5SDimitry Andric // shared among LSRUses, the less we increase the register number
50540b57cec5SDimitry Andric // counter of the formula.
50550b57cec5SDimitry Andric size_t FARegNum = 0;
50560b57cec5SDimitry Andric for (const SCEV *Reg : FA.BaseRegs) {
50570b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
50580b57cec5SDimitry Andric FARegNum += (NumUses - UsedByIndices.count() + 1);
50590b57cec5SDimitry Andric }
50600b57cec5SDimitry Andric size_t FBRegNum = 0;
50610b57cec5SDimitry Andric for (const SCEV *Reg : FB.BaseRegs) {
50620b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
50630b57cec5SDimitry Andric FBRegNum += (NumUses - UsedByIndices.count() + 1);
50640b57cec5SDimitry Andric }
50650b57cec5SDimitry Andric if (FARegNum != FBRegNum)
50660b57cec5SDimitry Andric return FARegNum < FBRegNum;
50670b57cec5SDimitry Andric
50680b57cec5SDimitry Andric // If the new register numbers are the same, choose the Formula with
50690b57cec5SDimitry Andric // less Cost.
5070fe6060f1SDimitry Andric Cost CostFA(L, SE, TTI, AMK);
5071fe6060f1SDimitry Andric Cost CostFB(L, SE, TTI, AMK);
50720b57cec5SDimitry Andric Regs.clear();
50730b57cec5SDimitry Andric CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
50740b57cec5SDimitry Andric Regs.clear();
50750b57cec5SDimitry Andric CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
50760b57cec5SDimitry Andric return CostFA.isLess(CostFB);
50770b57cec5SDimitry Andric };
50780b57cec5SDimitry Andric
50790b57cec5SDimitry Andric bool Any = false;
50800b57cec5SDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
50810b57cec5SDimitry Andric ++FIdx) {
50820b57cec5SDimitry Andric Formula &F = LU.Formulae[FIdx];
50830b57cec5SDimitry Andric if (!F.ScaledReg)
50840b57cec5SDimitry Andric continue;
50850b57cec5SDimitry Andric auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
50860b57cec5SDimitry Andric if (P.second)
50870b57cec5SDimitry Andric continue;
50880b57cec5SDimitry Andric
50890b57cec5SDimitry Andric Formula &Best = LU.Formulae[P.first->second];
50900b57cec5SDimitry Andric if (IsBetterThan(F, Best))
50910b57cec5SDimitry Andric std::swap(F, Best);
50920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
50930b57cec5SDimitry Andric dbgs() << "\n"
50940b57cec5SDimitry Andric " in favor of formula ";
50950b57cec5SDimitry Andric Best.print(dbgs()); dbgs() << '\n');
50960b57cec5SDimitry Andric #ifndef NDEBUG
50970b57cec5SDimitry Andric ChangedFormulae = true;
50980b57cec5SDimitry Andric #endif
50990b57cec5SDimitry Andric LU.DeleteFormula(F);
51000b57cec5SDimitry Andric --FIdx;
51010b57cec5SDimitry Andric --NumForms;
51020b57cec5SDimitry Andric Any = true;
51030b57cec5SDimitry Andric }
51040b57cec5SDimitry Andric if (Any)
51050b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
51060b57cec5SDimitry Andric
51070b57cec5SDimitry Andric // Reset this to prepare for the next use.
51080b57cec5SDimitry Andric BestFormulae.clear();
51090b57cec5SDimitry Andric }
51100b57cec5SDimitry Andric
51110b57cec5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) {
51120b57cec5SDimitry Andric dbgs() << "\n"
51130b57cec5SDimitry Andric "After filtering out undesirable candidates:\n";
51140b57cec5SDimitry Andric print_uses(dbgs());
51150b57cec5SDimitry Andric });
51160b57cec5SDimitry Andric }
51170b57cec5SDimitry Andric
51185ffd83dbSDimitry Andric /// If we are over the complexity limit, filter out any post-inc prefering
51195ffd83dbSDimitry Andric /// variables to only post-inc values.
NarrowSearchSpaceByFilterPostInc()51205ffd83dbSDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5121fe6060f1SDimitry Andric if (AMK != TTI::AMK_PostIndexed)
51225ffd83dbSDimitry Andric return;
51235ffd83dbSDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
51245ffd83dbSDimitry Andric return;
51255ffd83dbSDimitry Andric
51265ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
51275ffd83dbSDimitry Andric "Narrowing the search space by choosing the lowest "
51285ffd83dbSDimitry Andric "register Formula for PostInc Uses.\n");
51295ffd83dbSDimitry Andric
51305ffd83dbSDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
51315ffd83dbSDimitry Andric LSRUse &LU = Uses[LUIdx];
51325ffd83dbSDimitry Andric
51335ffd83dbSDimitry Andric if (LU.Kind != LSRUse::Address)
51345ffd83dbSDimitry Andric continue;
51355ffd83dbSDimitry Andric if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&
51365ffd83dbSDimitry Andric !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))
51375ffd83dbSDimitry Andric continue;
51385ffd83dbSDimitry Andric
51395ffd83dbSDimitry Andric size_t MinRegs = std::numeric_limits<size_t>::max();
51405ffd83dbSDimitry Andric for (const Formula &F : LU.Formulae)
51415ffd83dbSDimitry Andric MinRegs = std::min(F.getNumRegs(), MinRegs);
51425ffd83dbSDimitry Andric
51435ffd83dbSDimitry Andric bool Any = false;
51445ffd83dbSDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
51455ffd83dbSDimitry Andric ++FIdx) {
51465ffd83dbSDimitry Andric Formula &F = LU.Formulae[FIdx];
51475ffd83dbSDimitry Andric if (F.getNumRegs() > MinRegs) {
51485ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
51495ffd83dbSDimitry Andric dbgs() << "\n");
51505ffd83dbSDimitry Andric LU.DeleteFormula(F);
51515ffd83dbSDimitry Andric --FIdx;
51525ffd83dbSDimitry Andric --NumForms;
51535ffd83dbSDimitry Andric Any = true;
51545ffd83dbSDimitry Andric }
51555ffd83dbSDimitry Andric }
51565ffd83dbSDimitry Andric if (Any)
51575ffd83dbSDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
51585ffd83dbSDimitry Andric
51595ffd83dbSDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
51605ffd83dbSDimitry Andric break;
51615ffd83dbSDimitry Andric }
51625ffd83dbSDimitry Andric
51635ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
51645ffd83dbSDimitry Andric }
51655ffd83dbSDimitry Andric
51660b57cec5SDimitry Andric /// The function delete formulas with high registers number expectation.
51670b57cec5SDimitry Andric /// Assuming we don't know the value of each formula (already delete
51680b57cec5SDimitry Andric /// all inefficient), generate probability of not selecting for each
51690b57cec5SDimitry Andric /// register.
51700b57cec5SDimitry Andric /// For example,
51710b57cec5SDimitry Andric /// Use1:
51720b57cec5SDimitry Andric /// reg(a) + reg({0,+,1})
51730b57cec5SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1
51740b57cec5SDimitry Andric /// reg({a,+,1})
51750b57cec5SDimitry Andric /// Use2:
51760b57cec5SDimitry Andric /// reg(b) + reg({0,+,1})
51770b57cec5SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1
51780b57cec5SDimitry Andric /// reg({b,+,1})
51790b57cec5SDimitry Andric /// Use3:
51800b57cec5SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1})
51810b57cec5SDimitry Andric /// reg(c) + reg({b,+,1})
51820b57cec5SDimitry Andric ///
51830b57cec5SDimitry Andric /// Probability of not selecting
51840b57cec5SDimitry Andric /// Use1 Use2 Use3
51850b57cec5SDimitry Andric /// reg(a) (1/3) * 1 * 1
51860b57cec5SDimitry Andric /// reg(b) 1 * (1/3) * (1/2)
51870b57cec5SDimitry Andric /// reg({0,+,1}) (2/3) * (2/3) * (1/2)
51880b57cec5SDimitry Andric /// reg({-1,+,1}) (2/3) * (2/3) * 1
51890b57cec5SDimitry Andric /// reg({a,+,1}) (2/3) * 1 * 1
51900b57cec5SDimitry Andric /// reg({b,+,1}) 1 * (2/3) * (2/3)
51910b57cec5SDimitry Andric /// reg(c) 1 * 1 * 0
51920b57cec5SDimitry Andric ///
51930b57cec5SDimitry Andric /// Now count registers number mathematical expectation for each formula:
51940b57cec5SDimitry Andric /// Note that for each use we exclude probability if not selecting for the use.
51950b57cec5SDimitry Andric /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
51960b57cec5SDimitry Andric /// probabilty 1/3 of not selecting for Use1).
51970b57cec5SDimitry Andric /// Use1:
51980b57cec5SDimitry Andric /// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
51990b57cec5SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
52000b57cec5SDimitry Andric /// reg({a,+,1}) 1
52010b57cec5SDimitry Andric /// Use2:
52020b57cec5SDimitry Andric /// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
52030b57cec5SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
52040b57cec5SDimitry Andric /// reg({b,+,1}) 2/3
52050b57cec5SDimitry Andric /// Use3:
52060b57cec5SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
52070b57cec5SDimitry Andric /// reg(c) + reg({b,+,1}) 1 + 2/3
NarrowSearchSpaceByDeletingCostlyFormulas()52080b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
52090b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit)
52100b57cec5SDimitry Andric return;
52110b57cec5SDimitry Andric // Ok, we have too many of formulae on our hands to conveniently handle.
52120b57cec5SDimitry Andric // Use a rough heuristic to thin out the list.
52130b57cec5SDimitry Andric
52140b57cec5SDimitry Andric // Set of Regs wich will be 100% used in final solution.
52150b57cec5SDimitry Andric // Used in each formula of a solution (in example above this is reg(c)).
52160b57cec5SDimitry Andric // We can skip them in calculations.
52170b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> UniqRegs;
52180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
52190b57cec5SDimitry Andric
52200b57cec5SDimitry Andric // Map each register to probability of not selecting
52210b57cec5SDimitry Andric DenseMap <const SCEV *, float> RegNumMap;
52220b57cec5SDimitry Andric for (const SCEV *Reg : RegUses) {
52230b57cec5SDimitry Andric if (UniqRegs.count(Reg))
52240b57cec5SDimitry Andric continue;
52250b57cec5SDimitry Andric float PNotSel = 1;
52260b57cec5SDimitry Andric for (const LSRUse &LU : Uses) {
52270b57cec5SDimitry Andric if (!LU.Regs.count(Reg))
52280b57cec5SDimitry Andric continue;
52290b57cec5SDimitry Andric float P = LU.getNotSelectedProbability(Reg);
52300b57cec5SDimitry Andric if (P != 0.0)
52310b57cec5SDimitry Andric PNotSel *= P;
52320b57cec5SDimitry Andric else
52330b57cec5SDimitry Andric UniqRegs.insert(Reg);
52340b57cec5SDimitry Andric }
52350b57cec5SDimitry Andric RegNumMap.insert(std::make_pair(Reg, PNotSel));
52360b57cec5SDimitry Andric }
52370b57cec5SDimitry Andric
52380b57cec5SDimitry Andric LLVM_DEBUG(
52390b57cec5SDimitry Andric dbgs() << "Narrowing the search space by deleting costly formulas\n");
52400b57cec5SDimitry Andric
52410b57cec5SDimitry Andric // Delete formulas where registers number expectation is high.
52420b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
52430b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
52440b57cec5SDimitry Andric // If nothing to delete - continue.
52450b57cec5SDimitry Andric if (LU.Formulae.size() < 2)
52460b57cec5SDimitry Andric continue;
52470b57cec5SDimitry Andric // This is temporary solution to test performance. Float should be
52480b57cec5SDimitry Andric // replaced with round independent type (based on integers) to avoid
52490b57cec5SDimitry Andric // different results for different target builds.
52500b57cec5SDimitry Andric float FMinRegNum = LU.Formulae[0].getNumRegs();
52510b57cec5SDimitry Andric float FMinARegNum = LU.Formulae[0].getNumRegs();
52520b57cec5SDimitry Andric size_t MinIdx = 0;
52530b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
52540b57cec5SDimitry Andric Formula &F = LU.Formulae[i];
52550b57cec5SDimitry Andric float FRegNum = 0;
52560b57cec5SDimitry Andric float FARegNum = 0;
52570b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) {
52580b57cec5SDimitry Andric if (UniqRegs.count(BaseReg))
52590b57cec5SDimitry Andric continue;
52600b57cec5SDimitry Andric FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
52610b57cec5SDimitry Andric if (isa<SCEVAddRecExpr>(BaseReg))
52620b57cec5SDimitry Andric FARegNum +=
52630b57cec5SDimitry Andric RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
52640b57cec5SDimitry Andric }
52650b57cec5SDimitry Andric if (const SCEV *ScaledReg = F.ScaledReg) {
52660b57cec5SDimitry Andric if (!UniqRegs.count(ScaledReg)) {
52670b57cec5SDimitry Andric FRegNum +=
52680b57cec5SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
52690b57cec5SDimitry Andric if (isa<SCEVAddRecExpr>(ScaledReg))
52700b57cec5SDimitry Andric FARegNum +=
52710b57cec5SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
52720b57cec5SDimitry Andric }
52730b57cec5SDimitry Andric }
52740b57cec5SDimitry Andric if (FMinRegNum > FRegNum ||
52750b57cec5SDimitry Andric (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
52760b57cec5SDimitry Andric FMinRegNum = FRegNum;
52770b57cec5SDimitry Andric FMinARegNum = FARegNum;
52780b57cec5SDimitry Andric MinIdx = i;
52790b57cec5SDimitry Andric }
52800b57cec5SDimitry Andric }
52810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
52820b57cec5SDimitry Andric dbgs() << " with min reg num " << FMinRegNum << '\n');
52830b57cec5SDimitry Andric if (MinIdx != 0)
52840b57cec5SDimitry Andric std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
52850b57cec5SDimitry Andric while (LU.Formulae.size() != 1) {
52860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
52870b57cec5SDimitry Andric dbgs() << '\n');
52880b57cec5SDimitry Andric LU.Formulae.pop_back();
52890b57cec5SDimitry Andric }
52900b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
52910b57cec5SDimitry Andric assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
52920b57cec5SDimitry Andric Formula &F = LU.Formulae[0];
52930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
52940b57cec5SDimitry Andric // When we choose the formula, the regs become unique.
52950b57cec5SDimitry Andric UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
52960b57cec5SDimitry Andric if (F.ScaledReg)
52970b57cec5SDimitry Andric UniqRegs.insert(F.ScaledReg);
52980b57cec5SDimitry Andric }
52990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
53000b57cec5SDimitry Andric }
53010b57cec5SDimitry Andric
530206c3fb27SDimitry Andric // Check if Best and Reg are SCEVs separated by a constant amount C, and if so
530306c3fb27SDimitry Andric // would the addressing offset +C would be legal where the negative offset -C is
530406c3fb27SDimitry Andric // not.
IsSimplerBaseSCEVForTarget(const TargetTransformInfo & TTI,ScalarEvolution & SE,const SCEV * Best,const SCEV * Reg,MemAccessTy AccessType)530506c3fb27SDimitry Andric static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
530606c3fb27SDimitry Andric ScalarEvolution &SE, const SCEV *Best,
530706c3fb27SDimitry Andric const SCEV *Reg,
530806c3fb27SDimitry Andric MemAccessTy AccessType) {
530906c3fb27SDimitry Andric if (Best->getType() != Reg->getType() ||
531006c3fb27SDimitry Andric (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
531106c3fb27SDimitry Andric cast<SCEVAddRecExpr>(Best)->getLoop() !=
531206c3fb27SDimitry Andric cast<SCEVAddRecExpr>(Reg)->getLoop()))
531306c3fb27SDimitry Andric return false;
531406c3fb27SDimitry Andric const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg));
531506c3fb27SDimitry Andric if (!Diff)
531606c3fb27SDimitry Andric return false;
531706c3fb27SDimitry Andric
531806c3fb27SDimitry Andric return TTI.isLegalAddressingMode(
531906c3fb27SDimitry Andric AccessType.MemTy, /*BaseGV=*/nullptr,
532006c3fb27SDimitry Andric /*BaseOffset=*/Diff->getAPInt().getSExtValue(),
532106c3fb27SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) &&
532206c3fb27SDimitry Andric !TTI.isLegalAddressingMode(
532306c3fb27SDimitry Andric AccessType.MemTy, /*BaseGV=*/nullptr,
532406c3fb27SDimitry Andric /*BaseOffset=*/-Diff->getAPInt().getSExtValue(),
532506c3fb27SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace);
532606c3fb27SDimitry Andric }
532706c3fb27SDimitry Andric
53280b57cec5SDimitry Andric /// Pick a register which seems likely to be profitable, and then in any use
53290b57cec5SDimitry Andric /// which has any reference to that register, delete all formulae which do not
53300b57cec5SDimitry Andric /// reference that register.
NarrowSearchSpaceByPickingWinnerRegs()53310b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
53320b57cec5SDimitry Andric // With all other options exhausted, loop until the system is simple
53330b57cec5SDimitry Andric // enough to handle.
53340b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> Taken;
53350b57cec5SDimitry Andric while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
53360b57cec5SDimitry Andric // Ok, we have too many of formulae on our hands to conveniently handle.
53370b57cec5SDimitry Andric // Use a rough heuristic to thin out the list.
53380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
53390b57cec5SDimitry Andric
53400b57cec5SDimitry Andric // Pick the register which is used by the most LSRUses, which is likely
53410b57cec5SDimitry Andric // to be a good reuse register candidate.
53420b57cec5SDimitry Andric const SCEV *Best = nullptr;
53430b57cec5SDimitry Andric unsigned BestNum = 0;
53440b57cec5SDimitry Andric for (const SCEV *Reg : RegUses) {
53450b57cec5SDimitry Andric if (Taken.count(Reg))
53460b57cec5SDimitry Andric continue;
53470b57cec5SDimitry Andric if (!Best) {
53480b57cec5SDimitry Andric Best = Reg;
53490b57cec5SDimitry Andric BestNum = RegUses.getUsedByIndices(Reg).count();
53500b57cec5SDimitry Andric } else {
53510b57cec5SDimitry Andric unsigned Count = RegUses.getUsedByIndices(Reg).count();
53520b57cec5SDimitry Andric if (Count > BestNum) {
53530b57cec5SDimitry Andric Best = Reg;
53540b57cec5SDimitry Andric BestNum = Count;
53550b57cec5SDimitry Andric }
535606c3fb27SDimitry Andric
535706c3fb27SDimitry Andric // If the scores are the same, but the Reg is simpler for the target
535806c3fb27SDimitry Andric // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
535906c3fb27SDimitry Andric // handle +C but not -C), opt for the simpler formula.
536006c3fb27SDimitry Andric if (Count == BestNum) {
536106c3fb27SDimitry Andric int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
536206c3fb27SDimitry Andric if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
536306c3fb27SDimitry Andric IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
536406c3fb27SDimitry Andric Uses[LUIdx].AccessTy)) {
536506c3fb27SDimitry Andric Best = Reg;
536606c3fb27SDimitry Andric BestNum = Count;
536706c3fb27SDimitry Andric }
536806c3fb27SDimitry Andric }
53690b57cec5SDimitry Andric }
53700b57cec5SDimitry Andric }
53718bcb0991SDimitry Andric assert(Best && "Failed to find best LSRUse candidate");
53720b57cec5SDimitry Andric
53730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
53740b57cec5SDimitry Andric << " will yield profitable reuse.\n");
53750b57cec5SDimitry Andric Taken.insert(Best);
53760b57cec5SDimitry Andric
53770b57cec5SDimitry Andric // In any use with formulae which references this register, delete formulae
53780b57cec5SDimitry Andric // which don't reference it.
53790b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
53800b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx];
53810b57cec5SDimitry Andric if (!LU.Regs.count(Best)) continue;
53820b57cec5SDimitry Andric
53830b57cec5SDimitry Andric bool Any = false;
53840b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
53850b57cec5SDimitry Andric Formula &F = LU.Formulae[i];
53860b57cec5SDimitry Andric if (!F.referencesReg(Best)) {
53870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
53880b57cec5SDimitry Andric LU.DeleteFormula(F);
53890b57cec5SDimitry Andric --e;
53900b57cec5SDimitry Andric --i;
53910b57cec5SDimitry Andric Any = true;
53920b57cec5SDimitry Andric assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
53930b57cec5SDimitry Andric continue;
53940b57cec5SDimitry Andric }
53950b57cec5SDimitry Andric }
53960b57cec5SDimitry Andric
53970b57cec5SDimitry Andric if (Any)
53980b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses);
53990b57cec5SDimitry Andric }
54000b57cec5SDimitry Andric
54010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
54020b57cec5SDimitry Andric }
54030b57cec5SDimitry Andric }
54040b57cec5SDimitry Andric
54050b57cec5SDimitry Andric /// If there are an extraordinary number of formulae to choose from, use some
54060b57cec5SDimitry Andric /// rough heuristics to prune down the number of formulae. This keeps the main
54070b57cec5SDimitry Andric /// solver from taking an extraordinary amount of time in some worst-case
54080b57cec5SDimitry Andric /// scenarios.
NarrowSearchSpaceUsingHeuristics()54090b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
54100b57cec5SDimitry Andric NarrowSearchSpaceByDetectingSupersets();
54110b57cec5SDimitry Andric NarrowSearchSpaceByCollapsingUnrolledCode();
54120b57cec5SDimitry Andric NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
54130b57cec5SDimitry Andric if (FilterSameScaledReg)
54140b57cec5SDimitry Andric NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
54155ffd83dbSDimitry Andric NarrowSearchSpaceByFilterPostInc();
54160b57cec5SDimitry Andric if (LSRExpNarrow)
54170b57cec5SDimitry Andric NarrowSearchSpaceByDeletingCostlyFormulas();
54180b57cec5SDimitry Andric else
54190b57cec5SDimitry Andric NarrowSearchSpaceByPickingWinnerRegs();
54200b57cec5SDimitry Andric }
54210b57cec5SDimitry Andric
54220b57cec5SDimitry Andric /// This is the recursive solver.
SolveRecurse(SmallVectorImpl<const Formula * > & Solution,Cost & SolutionCost,SmallVectorImpl<const Formula * > & Workspace,const Cost & CurCost,const SmallPtrSet<const SCEV *,16> & CurRegs,DenseSet<const SCEV * > & VisitedRegs) const54230b57cec5SDimitry Andric void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
54240b57cec5SDimitry Andric Cost &SolutionCost,
54250b57cec5SDimitry Andric SmallVectorImpl<const Formula *> &Workspace,
54260b57cec5SDimitry Andric const Cost &CurCost,
54270b57cec5SDimitry Andric const SmallPtrSet<const SCEV *, 16> &CurRegs,
54280b57cec5SDimitry Andric DenseSet<const SCEV *> &VisitedRegs) const {
54290b57cec5SDimitry Andric // Some ideas:
54300b57cec5SDimitry Andric // - prune more:
54310b57cec5SDimitry Andric // - use more aggressive filtering
54320b57cec5SDimitry Andric // - sort the formula so that the most profitable solutions are found first
54330b57cec5SDimitry Andric // - sort the uses too
54340b57cec5SDimitry Andric // - search faster:
54350b57cec5SDimitry Andric // - don't compute a cost, and then compare. compare while computing a cost
54360b57cec5SDimitry Andric // and bail early.
54370b57cec5SDimitry Andric // - track register sets with SmallBitVector
54380b57cec5SDimitry Andric
54390b57cec5SDimitry Andric const LSRUse &LU = Uses[Workspace.size()];
54400b57cec5SDimitry Andric
54410b57cec5SDimitry Andric // If this use references any register that's already a part of the
54420b57cec5SDimitry Andric // in-progress solution, consider it a requirement that a formula must
54430b57cec5SDimitry Andric // reference that register in order to be considered. This prunes out
54440b57cec5SDimitry Andric // unprofitable searching.
54450b57cec5SDimitry Andric SmallSetVector<const SCEV *, 4> ReqRegs;
54460b57cec5SDimitry Andric for (const SCEV *S : CurRegs)
54470b57cec5SDimitry Andric if (LU.Regs.count(S))
54480b57cec5SDimitry Andric ReqRegs.insert(S);
54490b57cec5SDimitry Andric
54500b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> NewRegs;
5451fe6060f1SDimitry Andric Cost NewCost(L, SE, TTI, AMK);
54520b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) {
54530b57cec5SDimitry Andric // Ignore formulae which may not be ideal in terms of register reuse of
54540b57cec5SDimitry Andric // ReqRegs. The formula should use all required registers before
54550b57cec5SDimitry Andric // introducing new ones.
54565ffd83dbSDimitry Andric // This can sometimes (notably when trying to favour postinc) lead to
54575ffd83dbSDimitry Andric // sub-optimial decisions. There it is best left to the cost modelling to
54585ffd83dbSDimitry Andric // get correct.
5459fe6060f1SDimitry Andric if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
54600b57cec5SDimitry Andric int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
54610b57cec5SDimitry Andric for (const SCEV *Reg : ReqRegs) {
54620b57cec5SDimitry Andric if ((F.ScaledReg && F.ScaledReg == Reg) ||
54630b57cec5SDimitry Andric is_contained(F.BaseRegs, Reg)) {
54640b57cec5SDimitry Andric --NumReqRegsToFind;
54650b57cec5SDimitry Andric if (NumReqRegsToFind == 0)
54660b57cec5SDimitry Andric break;
54670b57cec5SDimitry Andric }
54680b57cec5SDimitry Andric }
54690b57cec5SDimitry Andric if (NumReqRegsToFind != 0) {
54700b57cec5SDimitry Andric // If none of the formulae satisfied the required registers, then we could
54710b57cec5SDimitry Andric // clear ReqRegs and try again. Currently, we simply give up in this case.
54720b57cec5SDimitry Andric continue;
54730b57cec5SDimitry Andric }
54745ffd83dbSDimitry Andric }
54750b57cec5SDimitry Andric
54760b57cec5SDimitry Andric // Evaluate the cost of the current formula. If it's already worse than
54770b57cec5SDimitry Andric // the current best, prune the search at that point.
54780b57cec5SDimitry Andric NewCost = CurCost;
54790b57cec5SDimitry Andric NewRegs = CurRegs;
54800b57cec5SDimitry Andric NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
54810b57cec5SDimitry Andric if (NewCost.isLess(SolutionCost)) {
54820b57cec5SDimitry Andric Workspace.push_back(&F);
54830b57cec5SDimitry Andric if (Workspace.size() != Uses.size()) {
54840b57cec5SDimitry Andric SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
54850b57cec5SDimitry Andric NewRegs, VisitedRegs);
54860b57cec5SDimitry Andric if (F.getNumRegs() == 1 && Workspace.size() == 1)
54870b57cec5SDimitry Andric VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
54880b57cec5SDimitry Andric } else {
54890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
54900b57cec5SDimitry Andric dbgs() << ".\nRegs:\n";
54910b57cec5SDimitry Andric for (const SCEV *S : NewRegs) dbgs()
54920b57cec5SDimitry Andric << "- " << *S << "\n";
54930b57cec5SDimitry Andric dbgs() << '\n');
54940b57cec5SDimitry Andric
54950b57cec5SDimitry Andric SolutionCost = NewCost;
54960b57cec5SDimitry Andric Solution = Workspace;
54970b57cec5SDimitry Andric }
54980b57cec5SDimitry Andric Workspace.pop_back();
54990b57cec5SDimitry Andric }
55000b57cec5SDimitry Andric }
55010b57cec5SDimitry Andric }
55020b57cec5SDimitry Andric
55030b57cec5SDimitry Andric /// Choose one formula from each use. Return the results in the given Solution
55040b57cec5SDimitry Andric /// vector.
Solve(SmallVectorImpl<const Formula * > & Solution) const55050b57cec5SDimitry Andric void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
55060b57cec5SDimitry Andric SmallVector<const Formula *, 8> Workspace;
5507fe6060f1SDimitry Andric Cost SolutionCost(L, SE, TTI, AMK);
55080b57cec5SDimitry Andric SolutionCost.Lose();
5509fe6060f1SDimitry Andric Cost CurCost(L, SE, TTI, AMK);
55100b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> CurRegs;
55110b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs;
55120b57cec5SDimitry Andric Workspace.reserve(Uses.size());
55130b57cec5SDimitry Andric
55140b57cec5SDimitry Andric // SolveRecurse does all the work.
55150b57cec5SDimitry Andric SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
55160b57cec5SDimitry Andric CurRegs, VisitedRegs);
55170b57cec5SDimitry Andric if (Solution.empty()) {
55180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
55190b57cec5SDimitry Andric return;
55200b57cec5SDimitry Andric }
55210b57cec5SDimitry Andric
55220b57cec5SDimitry Andric // Ok, we've now made all our decisions.
55230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"
55240b57cec5SDimitry Andric "The chosen solution requires ";
55250b57cec5SDimitry Andric SolutionCost.print(dbgs()); dbgs() << ":\n";
55260b57cec5SDimitry Andric for (size_t i = 0, e = Uses.size(); i != e; ++i) {
55270b57cec5SDimitry Andric dbgs() << " ";
55280b57cec5SDimitry Andric Uses[i].print(dbgs());
55290b57cec5SDimitry Andric dbgs() << "\n"
55300b57cec5SDimitry Andric " ";
55310b57cec5SDimitry Andric Solution[i]->print(dbgs());
55320b57cec5SDimitry Andric dbgs() << '\n';
55330b57cec5SDimitry Andric });
55340b57cec5SDimitry Andric
55350b57cec5SDimitry Andric assert(Solution.size() == Uses.size() && "Malformed solution!");
5536bdd1243dSDimitry Andric
55370fca6ea1SDimitry Andric const bool EnableDropUnprofitableSolution = [&] {
55380fca6ea1SDimitry Andric switch (AllowDropSolutionIfLessProfitable) {
55390fca6ea1SDimitry Andric case cl::BOU_TRUE:
55400fca6ea1SDimitry Andric return true;
55410fca6ea1SDimitry Andric case cl::BOU_FALSE:
55420fca6ea1SDimitry Andric return false;
55430fca6ea1SDimitry Andric case cl::BOU_UNSET:
55440fca6ea1SDimitry Andric return TTI.shouldDropLSRSolutionIfLessProfitable();
55450fca6ea1SDimitry Andric }
55460fca6ea1SDimitry Andric llvm_unreachable("Unhandled cl::boolOrDefault enum");
55470fca6ea1SDimitry Andric }();
55480fca6ea1SDimitry Andric
5549bdd1243dSDimitry Andric if (BaselineCost.isLess(SolutionCost)) {
55500fca6ea1SDimitry Andric if (!EnableDropUnprofitableSolution)
5551bdd1243dSDimitry Andric LLVM_DEBUG(
5552bdd1243dSDimitry Andric dbgs() << "Baseline is more profitable than chosen solution, "
5553bdd1243dSDimitry Andric "add option 'lsr-drop-solution' to drop LSR solution.\n");
5554bdd1243dSDimitry Andric else {
5555bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5556bdd1243dSDimitry Andric "solution, dropping LSR solution.\n";);
5557bdd1243dSDimitry Andric Solution.clear();
5558bdd1243dSDimitry Andric }
5559bdd1243dSDimitry Andric }
55600b57cec5SDimitry Andric }
55610b57cec5SDimitry Andric
55620b57cec5SDimitry Andric /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
55630b57cec5SDimitry Andric /// we can go while still being dominated by the input positions. This helps
55640b57cec5SDimitry Andric /// canonicalize the insert position, which encourages sharing.
55650b57cec5SDimitry Andric BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,const SmallVectorImpl<Instruction * > & Inputs) const55660b57cec5SDimitry Andric LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
55670b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Inputs)
55680b57cec5SDimitry Andric const {
55690b57cec5SDimitry Andric Instruction *Tentative = &*IP;
55700b57cec5SDimitry Andric while (true) {
55710b57cec5SDimitry Andric bool AllDominate = true;
55720b57cec5SDimitry Andric Instruction *BetterPos = nullptr;
55730b57cec5SDimitry Andric // Don't bother attempting to insert before a catchswitch, their basic block
55740b57cec5SDimitry Andric // cannot have other non-PHI instructions.
55750b57cec5SDimitry Andric if (isa<CatchSwitchInst>(Tentative))
55760b57cec5SDimitry Andric return IP;
55770b57cec5SDimitry Andric
55780b57cec5SDimitry Andric for (Instruction *Inst : Inputs) {
55790b57cec5SDimitry Andric if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
55800b57cec5SDimitry Andric AllDominate = false;
55810b57cec5SDimitry Andric break;
55820b57cec5SDimitry Andric }
55830b57cec5SDimitry Andric // Attempt to find an insert position in the middle of the block,
55840b57cec5SDimitry Andric // instead of at the end, so that it can be used for other expansions.
55850b57cec5SDimitry Andric if (Tentative->getParent() == Inst->getParent() &&
55860b57cec5SDimitry Andric (!BetterPos || !DT.dominates(Inst, BetterPos)))
55870b57cec5SDimitry Andric BetterPos = &*std::next(BasicBlock::iterator(Inst));
55880b57cec5SDimitry Andric }
55890b57cec5SDimitry Andric if (!AllDominate)
55900b57cec5SDimitry Andric break;
55910b57cec5SDimitry Andric if (BetterPos)
55920b57cec5SDimitry Andric IP = BetterPos->getIterator();
55930b57cec5SDimitry Andric else
55940b57cec5SDimitry Andric IP = Tentative->getIterator();
55950b57cec5SDimitry Andric
55960b57cec5SDimitry Andric const Loop *IPLoop = LI.getLoopFor(IP->getParent());
55970b57cec5SDimitry Andric unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
55980b57cec5SDimitry Andric
55990b57cec5SDimitry Andric BasicBlock *IDom;
56000b57cec5SDimitry Andric for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
56010b57cec5SDimitry Andric if (!Rung) return IP;
56020b57cec5SDimitry Andric Rung = Rung->getIDom();
56030b57cec5SDimitry Andric if (!Rung) return IP;
56040b57cec5SDimitry Andric IDom = Rung->getBlock();
56050b57cec5SDimitry Andric
56060b57cec5SDimitry Andric // Don't climb into a loop though.
56070b57cec5SDimitry Andric const Loop *IDomLoop = LI.getLoopFor(IDom);
56080b57cec5SDimitry Andric unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
56090b57cec5SDimitry Andric if (IDomDepth <= IPLoopDepth &&
56100b57cec5SDimitry Andric (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
56110b57cec5SDimitry Andric break;
56120b57cec5SDimitry Andric }
56130b57cec5SDimitry Andric
56140b57cec5SDimitry Andric Tentative = IDom->getTerminator();
56150b57cec5SDimitry Andric }
56160b57cec5SDimitry Andric
56170b57cec5SDimitry Andric return IP;
56180b57cec5SDimitry Andric }
56190b57cec5SDimitry Andric
56200b57cec5SDimitry Andric /// Determine an input position which will be dominated by the operands and
56210b57cec5SDimitry Andric /// which will dominate the result.
AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,const LSRFixup & LF,const LSRUse & LU) const5622fcaf7f86SDimitry Andric BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5623fcaf7f86SDimitry Andric BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
56240b57cec5SDimitry Andric // Collect some instructions which must be dominated by the
56250b57cec5SDimitry Andric // expanding replacement. These must be dominated by any operands that
56260b57cec5SDimitry Andric // will be required in the expansion.
56270b57cec5SDimitry Andric SmallVector<Instruction *, 4> Inputs;
56280b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
56290b57cec5SDimitry Andric Inputs.push_back(I);
56300b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero)
56310b57cec5SDimitry Andric if (Instruction *I =
56320b57cec5SDimitry Andric dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
56330b57cec5SDimitry Andric Inputs.push_back(I);
56340b57cec5SDimitry Andric if (LF.PostIncLoops.count(L)) {
56350b57cec5SDimitry Andric if (LF.isUseFullyOutsideLoop(L))
56360b57cec5SDimitry Andric Inputs.push_back(L->getLoopLatch()->getTerminator());
56370b57cec5SDimitry Andric else
56380b57cec5SDimitry Andric Inputs.push_back(IVIncInsertPos);
56390b57cec5SDimitry Andric }
56400b57cec5SDimitry Andric // The expansion must also be dominated by the increment positions of any
56410b57cec5SDimitry Andric // loops it for which it is using post-inc mode.
56420b57cec5SDimitry Andric for (const Loop *PIL : LF.PostIncLoops) {
56430b57cec5SDimitry Andric if (PIL == L) continue;
56440b57cec5SDimitry Andric
56450b57cec5SDimitry Andric // Be dominated by the loop exit.
56460b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks;
56470b57cec5SDimitry Andric PIL->getExitingBlocks(ExitingBlocks);
56480b57cec5SDimitry Andric if (!ExitingBlocks.empty()) {
56490b57cec5SDimitry Andric BasicBlock *BB = ExitingBlocks[0];
56500b57cec5SDimitry Andric for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
56510b57cec5SDimitry Andric BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
56520b57cec5SDimitry Andric Inputs.push_back(BB->getTerminator());
56530b57cec5SDimitry Andric }
56540b57cec5SDimitry Andric }
56550b57cec5SDimitry Andric
56560b57cec5SDimitry Andric assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
56570b57cec5SDimitry Andric && !isa<DbgInfoIntrinsic>(LowestIP) &&
56580b57cec5SDimitry Andric "Insertion point must be a normal instruction");
56590b57cec5SDimitry Andric
56600b57cec5SDimitry Andric // Then, climb up the immediate dominator tree as far as we can go while
56610b57cec5SDimitry Andric // still being dominated by the input positions.
56620b57cec5SDimitry Andric BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
56630b57cec5SDimitry Andric
56640b57cec5SDimitry Andric // Don't insert instructions before PHI nodes.
56650b57cec5SDimitry Andric while (isa<PHINode>(IP)) ++IP;
56660b57cec5SDimitry Andric
56670b57cec5SDimitry Andric // Ignore landingpad instructions.
56680b57cec5SDimitry Andric while (IP->isEHPad()) ++IP;
56690b57cec5SDimitry Andric
56700b57cec5SDimitry Andric // Ignore debug intrinsics.
56710b57cec5SDimitry Andric while (isa<DbgInfoIntrinsic>(IP)) ++IP;
56720b57cec5SDimitry Andric
56730b57cec5SDimitry Andric // Set IP below instructions recently inserted by SCEVExpander. This keeps the
56740b57cec5SDimitry Andric // IP consistent across expansions and allows the previously inserted
56750b57cec5SDimitry Andric // instructions to be reused by subsequent expansion.
56760b57cec5SDimitry Andric while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
56770b57cec5SDimitry Andric ++IP;
56780b57cec5SDimitry Andric
56790b57cec5SDimitry Andric return IP;
56800b57cec5SDimitry Andric }
56810b57cec5SDimitry Andric
56820b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
56830b57cec5SDimitry Andric /// is called "expanding").
Expand(const LSRUse & LU,const LSRFixup & LF,const Formula & F,BasicBlock::iterator IP,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const56840b57cec5SDimitry Andric Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
56850b57cec5SDimitry Andric const Formula &F, BasicBlock::iterator IP,
56860b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
56870b57cec5SDimitry Andric if (LU.RigidFormula)
56880b57cec5SDimitry Andric return LF.OperandValToReplace;
56890b57cec5SDimitry Andric
56900b57cec5SDimitry Andric // Determine an input position which will be dominated by the operands and
56910b57cec5SDimitry Andric // which will dominate the result.
5692fcaf7f86SDimitry Andric IP = AdjustInsertPositionForExpand(IP, LF, LU);
56930b57cec5SDimitry Andric Rewriter.setInsertPoint(&*IP);
56940b57cec5SDimitry Andric
56950b57cec5SDimitry Andric // Inform the Rewriter if we have a post-increment use, so that it can
56960b57cec5SDimitry Andric // perform an advantageous expansion.
56970b57cec5SDimitry Andric Rewriter.setPostInc(LF.PostIncLoops);
56980b57cec5SDimitry Andric
56990b57cec5SDimitry Andric // This is the type that the user actually needs.
57000b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
57010b57cec5SDimitry Andric // This will be the type that we'll initially expand to.
57020b57cec5SDimitry Andric Type *Ty = F.getType();
57030b57cec5SDimitry Andric if (!Ty)
57040b57cec5SDimitry Andric // No type known; just expand directly to the ultimate type.
57050b57cec5SDimitry Andric Ty = OpTy;
57060b57cec5SDimitry Andric else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
57070b57cec5SDimitry Andric // Expand directly to the ultimate type if it's the right size.
57080b57cec5SDimitry Andric Ty = OpTy;
57090b57cec5SDimitry Andric // This is the type to do integer arithmetic in.
57100b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(Ty);
57110b57cec5SDimitry Andric
57120b57cec5SDimitry Andric // Build up a list of operands to add together to form the full base.
57130b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Ops;
57140b57cec5SDimitry Andric
57150b57cec5SDimitry Andric // Expand the BaseRegs portion.
57160b57cec5SDimitry Andric for (const SCEV *Reg : F.BaseRegs) {
57170b57cec5SDimitry Andric assert(!Reg->isZero() && "Zero allocated in a base register!");
57180b57cec5SDimitry Andric
57190b57cec5SDimitry Andric // If we're expanding for a post-inc user, make the post-inc adjustment.
57200b57cec5SDimitry Andric Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
57210b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
57220b57cec5SDimitry Andric }
57230b57cec5SDimitry Andric
57240b57cec5SDimitry Andric // Expand the ScaledReg portion.
57250b57cec5SDimitry Andric Value *ICmpScaledV = nullptr;
57260b57cec5SDimitry Andric if (F.Scale != 0) {
57270b57cec5SDimitry Andric const SCEV *ScaledS = F.ScaledReg;
57280b57cec5SDimitry Andric
57290b57cec5SDimitry Andric // If we're expanding for a post-inc user, make the post-inc adjustment.
57300b57cec5SDimitry Andric PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
57310b57cec5SDimitry Andric ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
57320b57cec5SDimitry Andric
57330b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) {
57340b57cec5SDimitry Andric // Expand ScaleReg as if it was part of the base regs.
57350b57cec5SDimitry Andric if (F.Scale == 1)
57360b57cec5SDimitry Andric Ops.push_back(
57370b57cec5SDimitry Andric SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
57380b57cec5SDimitry Andric else {
57390b57cec5SDimitry Andric // An interesting way of "folding" with an icmp is to use a negated
57400b57cec5SDimitry Andric // scale, which we'll implement by inserting it into the other operand
57410b57cec5SDimitry Andric // of the icmp.
57420b57cec5SDimitry Andric assert(F.Scale == -1 &&
57430b57cec5SDimitry Andric "The only scale supported by ICmpZero uses is -1!");
57440b57cec5SDimitry Andric ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
57450b57cec5SDimitry Andric }
57460b57cec5SDimitry Andric } else {
57470b57cec5SDimitry Andric // Otherwise just expand the scaled register and an explicit scale,
57480b57cec5SDimitry Andric // which is expected to be matched as part of the address.
57490b57cec5SDimitry Andric
57500b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting address modes.
57510b57cec5SDimitry Andric // Unless the addressing mode will not be folded.
57520b57cec5SDimitry Andric if (!Ops.empty() && LU.Kind == LSRUse::Address &&
57530b57cec5SDimitry Andric isAMCompletelyFolded(TTI, LU, F)) {
57540b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
57550b57cec5SDimitry Andric Ops.clear();
57560b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV));
57570b57cec5SDimitry Andric }
57580b57cec5SDimitry Andric ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
57590b57cec5SDimitry Andric if (F.Scale != 1)
57600b57cec5SDimitry Andric ScaledS =
57610b57cec5SDimitry Andric SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
57620b57cec5SDimitry Andric Ops.push_back(ScaledS);
57630b57cec5SDimitry Andric }
57640b57cec5SDimitry Andric }
57650b57cec5SDimitry Andric
57660b57cec5SDimitry Andric // Expand the GV portion.
57670b57cec5SDimitry Andric if (F.BaseGV) {
57680b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting.
57690b57cec5SDimitry Andric if (!Ops.empty()) {
5770fe6060f1SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);
57710b57cec5SDimitry Andric Ops.clear();
57720b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV));
57730b57cec5SDimitry Andric }
57740b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(F.BaseGV));
57750b57cec5SDimitry Andric }
57760b57cec5SDimitry Andric
57770b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting of both folded and
57780b57cec5SDimitry Andric // unfolded offsets. LSR assumes they both live next to their uses.
57790b57cec5SDimitry Andric if (!Ops.empty()) {
57800b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
57810b57cec5SDimitry Andric Ops.clear();
57820b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV));
57830b57cec5SDimitry Andric }
57840b57cec5SDimitry Andric
57850fca6ea1SDimitry Andric // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
57860fca6ea1SDimitry Andric // out at this point, or should we generate a SCEV adding together mixed
57870fca6ea1SDimitry Andric // offsets?
57880fca6ea1SDimitry Andric assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
57890fca6ea1SDimitry Andric "Expanding mismatched offsets\n");
57900b57cec5SDimitry Andric // Expand the immediate portion.
57910fca6ea1SDimitry Andric Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);
57920fca6ea1SDimitry Andric if (Offset.isNonZero()) {
57930b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) {
57940b57cec5SDimitry Andric // The other interesting way of "folding" with an ICmpZero is to use a
57950b57cec5SDimitry Andric // negated immediate.
57960b57cec5SDimitry Andric if (!ICmpScaledV)
57970fca6ea1SDimitry Andric ICmpScaledV =
57980fca6ea1SDimitry Andric ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());
57990b57cec5SDimitry Andric else {
58000b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(ICmpScaledV));
58010fca6ea1SDimitry Andric ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());
58020b57cec5SDimitry Andric }
58030b57cec5SDimitry Andric } else {
58040b57cec5SDimitry Andric // Just add the immediate values. These again are expected to be matched
58050b57cec5SDimitry Andric // as part of the address.
58060fca6ea1SDimitry Andric Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));
58070b57cec5SDimitry Andric }
58080b57cec5SDimitry Andric }
58090b57cec5SDimitry Andric
58100b57cec5SDimitry Andric // Expand the unfolded offset portion.
58110fca6ea1SDimitry Andric Immediate UnfoldedOffset = F.UnfoldedOffset;
58120fca6ea1SDimitry Andric if (UnfoldedOffset.isNonZero()) {
58130b57cec5SDimitry Andric // Just add the immediate values.
58140fca6ea1SDimitry Andric Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
58150b57cec5SDimitry Andric }
58160b57cec5SDimitry Andric
58170b57cec5SDimitry Andric // Emit instructions summing all the operands.
58180b57cec5SDimitry Andric const SCEV *FullS = Ops.empty() ?
58190b57cec5SDimitry Andric SE.getConstant(IntTy, 0) :
58200b57cec5SDimitry Andric SE.getAddExpr(Ops);
58210b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
58220b57cec5SDimitry Andric
58230b57cec5SDimitry Andric // We're done expanding now, so reset the rewriter.
58240b57cec5SDimitry Andric Rewriter.clearPostInc();
58250b57cec5SDimitry Andric
58260b57cec5SDimitry Andric // An ICmpZero Formula represents an ICmp which we're handling as a
58270b57cec5SDimitry Andric // comparison against zero. Now that we've expanded an expression for that
58280b57cec5SDimitry Andric // form, update the ICmp's other operand.
58290b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) {
58300b57cec5SDimitry Andric ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
58315ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))
58325ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr);
58330b57cec5SDimitry Andric assert(!F.BaseGV && "ICmp does not support folding a global value and "
58340b57cec5SDimitry Andric "a scale at the same time!");
58350b57cec5SDimitry Andric if (F.Scale == -1) {
58360b57cec5SDimitry Andric if (ICmpScaledV->getType() != OpTy) {
58370fca6ea1SDimitry Andric Instruction *Cast = CastInst::Create(
58380fca6ea1SDimitry Andric CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false),
58390fca6ea1SDimitry Andric ICmpScaledV, OpTy, "tmp", CI->getIterator());
58400b57cec5SDimitry Andric ICmpScaledV = Cast;
58410b57cec5SDimitry Andric }
58420b57cec5SDimitry Andric CI->setOperand(1, ICmpScaledV);
58430b57cec5SDimitry Andric } else {
58440b57cec5SDimitry Andric // A scale of 1 means that the scale has been expanded as part of the
58450b57cec5SDimitry Andric // base regs.
58460b57cec5SDimitry Andric assert((F.Scale == 0 || F.Scale == 1) &&
58470b57cec5SDimitry Andric "ICmp does not support folding a global value and "
58480b57cec5SDimitry Andric "a scale at the same time!");
58490b57cec5SDimitry Andric Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
58500fca6ea1SDimitry Andric -(uint64_t)Offset.getFixedValue());
58515f757f3fSDimitry Andric if (C->getType() != OpTy) {
58525f757f3fSDimitry Andric C = ConstantFoldCastOperand(
58535f757f3fSDimitry Andric CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,
58540fca6ea1SDimitry Andric CI->getDataLayout());
58555f757f3fSDimitry Andric assert(C && "Cast of ConstantInt should have folded");
58565f757f3fSDimitry Andric }
58570b57cec5SDimitry Andric
58580b57cec5SDimitry Andric CI->setOperand(1, C);
58590b57cec5SDimitry Andric }
58600b57cec5SDimitry Andric }
58610b57cec5SDimitry Andric
58620b57cec5SDimitry Andric return FullV;
58630b57cec5SDimitry Andric }
58640b57cec5SDimitry Andric
58650b57cec5SDimitry Andric /// Helper for Rewrite. PHI nodes are special because the use of their operands
58660b57cec5SDimitry Andric /// effectively happens in their predecessor blocks, so the expression may need
58670b57cec5SDimitry Andric /// to be expanded in multiple places.
RewriteForPHI(PHINode * PN,const LSRUse & LU,const LSRFixup & LF,const Formula & F,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const58680b57cec5SDimitry Andric void LSRInstance::RewriteForPHI(
58690b57cec5SDimitry Andric PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5870fcaf7f86SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
58710b57cec5SDimitry Andric DenseMap<BasicBlock *, Value *> Inserted;
587206c3fb27SDimitry Andric
587306c3fb27SDimitry Andric // Inserting instructions in the loop and using them as PHI's input could
587406c3fb27SDimitry Andric // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
587506c3fb27SDimitry Andric // corresponding incoming block is not loop exiting). So collect all such
587606c3fb27SDimitry Andric // instructions to form LCSSA for them later.
587706c3fb27SDimitry Andric SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
587806c3fb27SDimitry Andric
58790b57cec5SDimitry Andric for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
58800b57cec5SDimitry Andric if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
58810b57cec5SDimitry Andric bool needUpdateFixups = false;
58820b57cec5SDimitry Andric BasicBlock *BB = PN->getIncomingBlock(i);
58830b57cec5SDimitry Andric
58840b57cec5SDimitry Andric // If this is a critical edge, split the edge so that we do not insert
58850b57cec5SDimitry Andric // the code on all predecessor/successor paths. We do this unless this
58860b57cec5SDimitry Andric // is the canonical backedge for this loop, which complicates post-inc
58870b57cec5SDimitry Andric // users.
58880b57cec5SDimitry Andric if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
58890b57cec5SDimitry Andric !isa<IndirectBrInst>(BB->getTerminator()) &&
58900b57cec5SDimitry Andric !isa<CatchSwitchInst>(BB->getTerminator())) {
58910b57cec5SDimitry Andric BasicBlock *Parent = PN->getParent();
58920b57cec5SDimitry Andric Loop *PNLoop = LI.getLoopFor(Parent);
58930b57cec5SDimitry Andric if (!PNLoop || Parent != PNLoop->getHeader()) {
58940b57cec5SDimitry Andric // Split the critical edge.
58950b57cec5SDimitry Andric BasicBlock *NewBB = nullptr;
58960b57cec5SDimitry Andric if (!Parent->isLandingPad()) {
5897e8d8bef9SDimitry Andric NewBB =
5898e8d8bef9SDimitry Andric SplitCriticalEdge(BB, Parent,
5899e8d8bef9SDimitry Andric CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
59000b57cec5SDimitry Andric .setMergeIdenticalEdges()
59010b57cec5SDimitry Andric .setKeepOneInputPHIs());
59020b57cec5SDimitry Andric } else {
59030b57cec5SDimitry Andric SmallVector<BasicBlock*, 2> NewBBs;
59045f757f3fSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
59055f757f3fSDimitry Andric SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI);
59060b57cec5SDimitry Andric NewBB = NewBBs[0];
59070b57cec5SDimitry Andric }
59080b57cec5SDimitry Andric // If NewBB==NULL, then SplitCriticalEdge refused to split because all
59090b57cec5SDimitry Andric // phi predecessors are identical. The simple thing to do is skip
59100b57cec5SDimitry Andric // splitting in this case rather than complicate the API.
59110b57cec5SDimitry Andric if (NewBB) {
59120b57cec5SDimitry Andric // If PN is outside of the loop and BB is in the loop, we want to
59130b57cec5SDimitry Andric // move the block to be immediately before the PHI block, not
59140b57cec5SDimitry Andric // immediately after BB.
59150b57cec5SDimitry Andric if (L->contains(BB) && !L->contains(PN))
59160b57cec5SDimitry Andric NewBB->moveBefore(PN->getParent());
59170b57cec5SDimitry Andric
59180b57cec5SDimitry Andric // Splitting the edge can reduce the number of PHI entries we have.
59190b57cec5SDimitry Andric e = PN->getNumIncomingValues();
59200b57cec5SDimitry Andric BB = NewBB;
59210b57cec5SDimitry Andric i = PN->getBasicBlockIndex(BB);
59220b57cec5SDimitry Andric
59230b57cec5SDimitry Andric needUpdateFixups = true;
59240b57cec5SDimitry Andric }
59250b57cec5SDimitry Andric }
59260b57cec5SDimitry Andric }
59270b57cec5SDimitry Andric
59280b57cec5SDimitry Andric std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
59290b57cec5SDimitry Andric Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
59300b57cec5SDimitry Andric if (!Pair.second)
59310b57cec5SDimitry Andric PN->setIncomingValue(i, Pair.first->second);
59320b57cec5SDimitry Andric else {
5933fcaf7f86SDimitry Andric Value *FullV =
5934fcaf7f86SDimitry Andric Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);
59350b57cec5SDimitry Andric
59360b57cec5SDimitry Andric // If this is reuse-by-noop-cast, insert the noop cast.
59370b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
59380b57cec5SDimitry Andric if (FullV->getType() != OpTy)
59390fca6ea1SDimitry Andric FullV = CastInst::Create(
59400fca6ea1SDimitry Andric CastInst::getCastOpcode(FullV, false, OpTy, false), FullV,
59410fca6ea1SDimitry Andric LF.OperandValToReplace->getType(), "tmp",
59420fca6ea1SDimitry Andric BB->getTerminator()->getIterator());
59430b57cec5SDimitry Andric
594406c3fb27SDimitry Andric // If the incoming block for this value is not in the loop, it means the
594506c3fb27SDimitry Andric // current PHI is not in a loop exit, so we must create a LCSSA PHI for
594606c3fb27SDimitry Andric // the inserted value.
594706c3fb27SDimitry Andric if (auto *I = dyn_cast<Instruction>(FullV))
594806c3fb27SDimitry Andric if (L->contains(I) && !L->contains(BB))
594906c3fb27SDimitry Andric InsertedNonLCSSAInsts.push_back(I);
595006c3fb27SDimitry Andric
59510b57cec5SDimitry Andric PN->setIncomingValue(i, FullV);
59520b57cec5SDimitry Andric Pair.first->second = FullV;
59530b57cec5SDimitry Andric }
59540b57cec5SDimitry Andric
59550b57cec5SDimitry Andric // If LSR splits critical edge and phi node has other pending
59560b57cec5SDimitry Andric // fixup operands, we need to update those pending fixups. Otherwise
59570b57cec5SDimitry Andric // formulae will not be implemented completely and some instructions
59580b57cec5SDimitry Andric // will not be eliminated.
59590b57cec5SDimitry Andric if (needUpdateFixups) {
59600fca6ea1SDimitry Andric for (LSRUse &LU : Uses)
59610fca6ea1SDimitry Andric for (LSRFixup &Fixup : LU.Fixups)
59620b57cec5SDimitry Andric // If fixup is supposed to rewrite some operand in the phi
59630b57cec5SDimitry Andric // that was just updated, it may be already moved to
59640b57cec5SDimitry Andric // another phi node. Such fixup requires update.
59650b57cec5SDimitry Andric if (Fixup.UserInst == PN) {
59660b57cec5SDimitry Andric // Check if the operand we try to replace still exists in the
59670b57cec5SDimitry Andric // original phi.
59680b57cec5SDimitry Andric bool foundInOriginalPHI = false;
59690b57cec5SDimitry Andric for (const auto &val : PN->incoming_values())
59700b57cec5SDimitry Andric if (val == Fixup.OperandValToReplace) {
59710b57cec5SDimitry Andric foundInOriginalPHI = true;
59720b57cec5SDimitry Andric break;
59730b57cec5SDimitry Andric }
59740b57cec5SDimitry Andric
59750b57cec5SDimitry Andric // If fixup operand found in original PHI - nothing to do.
59760b57cec5SDimitry Andric if (foundInOriginalPHI)
59770b57cec5SDimitry Andric continue;
59780b57cec5SDimitry Andric
59790b57cec5SDimitry Andric // Otherwise it might be moved to another PHI and requires update.
59800b57cec5SDimitry Andric // If fixup operand not found in any of the incoming blocks that
59810b57cec5SDimitry Andric // means we have already rewritten it - nothing to do.
59820b57cec5SDimitry Andric for (const auto &Block : PN->blocks())
59830b57cec5SDimitry Andric for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
59840b57cec5SDimitry Andric ++I) {
59850b57cec5SDimitry Andric PHINode *NewPN = cast<PHINode>(I);
59860b57cec5SDimitry Andric for (const auto &val : NewPN->incoming_values())
59870b57cec5SDimitry Andric if (val == Fixup.OperandValToReplace)
59880b57cec5SDimitry Andric Fixup.UserInst = NewPN;
59890b57cec5SDimitry Andric }
59900b57cec5SDimitry Andric }
59910b57cec5SDimitry Andric }
59920b57cec5SDimitry Andric }
599306c3fb27SDimitry Andric
599406c3fb27SDimitry Andric formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
59950b57cec5SDimitry Andric }
59960b57cec5SDimitry Andric
59970b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
59980b57cec5SDimitry Andric /// is called "expanding"), and update the UserInst to reference the newly
59990b57cec5SDimitry Andric /// expanded value.
Rewrite(const LSRUse & LU,const LSRFixup & LF,const Formula & F,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const60000b57cec5SDimitry Andric void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
6001fcaf7f86SDimitry Andric const Formula &F,
60020b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
60030b57cec5SDimitry Andric // First, find an insertion point that dominates UserInst. For PHI nodes,
60040b57cec5SDimitry Andric // find the nearest block which dominates all the relevant uses.
60050b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6006fcaf7f86SDimitry Andric RewriteForPHI(PN, LU, LF, F, DeadInsts);
60070b57cec5SDimitry Andric } else {
6008fcaf7f86SDimitry Andric Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
60090b57cec5SDimitry Andric
60100b57cec5SDimitry Andric // If this is reuse-by-noop-cast, insert the noop cast.
60110b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType();
60120b57cec5SDimitry Andric if (FullV->getType() != OpTy) {
60130b57cec5SDimitry Andric Instruction *Cast =
60140b57cec5SDimitry Andric CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
60150fca6ea1SDimitry Andric FullV, OpTy, "tmp", LF.UserInst->getIterator());
60160b57cec5SDimitry Andric FullV = Cast;
60170b57cec5SDimitry Andric }
60180b57cec5SDimitry Andric
60190b57cec5SDimitry Andric // Update the user. ICmpZero is handled specially here (for now) because
60200b57cec5SDimitry Andric // Expand may have updated one of the operands of the icmp already, and
60210b57cec5SDimitry Andric // its new value may happen to be equal to LF.OperandValToReplace, in
60220b57cec5SDimitry Andric // which case doing replaceUsesOfWith leads to replacing both operands
60230b57cec5SDimitry Andric // with the same value. TODO: Reorganize this.
60240b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero)
60250b57cec5SDimitry Andric LF.UserInst->setOperand(0, FullV);
60260b57cec5SDimitry Andric else
60270b57cec5SDimitry Andric LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
60280b57cec5SDimitry Andric }
60290b57cec5SDimitry Andric
60305ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
60315ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr);
60320b57cec5SDimitry Andric }
60330b57cec5SDimitry Andric
603406c3fb27SDimitry Andric // Trying to hoist the IVInc to loop header if all IVInc users are in
603506c3fb27SDimitry Andric // the loop header. It will help backend to generate post index load/store
603606c3fb27SDimitry Andric // when the latch block is different from loop header block.
canHoistIVInc(const TargetTransformInfo & TTI,const LSRFixup & Fixup,const LSRUse & LU,Instruction * IVIncInsertPos,Loop * L)603706c3fb27SDimitry Andric static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
603806c3fb27SDimitry Andric const LSRUse &LU, Instruction *IVIncInsertPos,
603906c3fb27SDimitry Andric Loop *L) {
604006c3fb27SDimitry Andric if (LU.Kind != LSRUse::Address)
604106c3fb27SDimitry Andric return false;
604206c3fb27SDimitry Andric
604306c3fb27SDimitry Andric // For now this code do the conservative optimization, only work for
604406c3fb27SDimitry Andric // the header block. Later we can hoist the IVInc to the block post
604506c3fb27SDimitry Andric // dominate all users.
604606c3fb27SDimitry Andric BasicBlock *LHeader = L->getHeader();
604706c3fb27SDimitry Andric if (IVIncInsertPos->getParent() == LHeader)
604806c3fb27SDimitry Andric return false;
604906c3fb27SDimitry Andric
605006c3fb27SDimitry Andric if (!Fixup.OperandValToReplace ||
605106c3fb27SDimitry Andric any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
605206c3fb27SDimitry Andric Instruction *UI = cast<Instruction>(U);
605306c3fb27SDimitry Andric return UI->getParent() != LHeader;
605406c3fb27SDimitry Andric }))
605506c3fb27SDimitry Andric return false;
605606c3fb27SDimitry Andric
605706c3fb27SDimitry Andric Instruction *I = Fixup.UserInst;
605806c3fb27SDimitry Andric Type *Ty = I->getType();
605906c3fb27SDimitry Andric return Ty->isIntegerTy() &&
606006c3fb27SDimitry Andric ((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
606106c3fb27SDimitry Andric (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
606206c3fb27SDimitry Andric }
606306c3fb27SDimitry Andric
60640b57cec5SDimitry Andric /// Rewrite all the fixup locations with new values, following the chosen
60650b57cec5SDimitry Andric /// solution.
ImplementSolution(const SmallVectorImpl<const Formula * > & Solution)60660b57cec5SDimitry Andric void LSRInstance::ImplementSolution(
60670b57cec5SDimitry Andric const SmallVectorImpl<const Formula *> &Solution) {
60680b57cec5SDimitry Andric // Keep track of instructions we may have made dead, so that
60690b57cec5SDimitry Andric // we can remove them after we are done working.
60700b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts;
60710b57cec5SDimitry Andric
60720b57cec5SDimitry Andric // Mark phi nodes that terminate chains so the expander tries to reuse them.
60730b57cec5SDimitry Andric for (const IVChain &Chain : IVChainVec) {
60740b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
60750b57cec5SDimitry Andric Rewriter.setChainedPhi(PN);
60760b57cec5SDimitry Andric }
60770b57cec5SDimitry Andric
60780b57cec5SDimitry Andric // Expand the new value definitions and update the users.
60790b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
60800b57cec5SDimitry Andric for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
608106c3fb27SDimitry Andric Instruction *InsertPos =
608206c3fb27SDimitry Andric canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
608306c3fb27SDimitry Andric ? L->getHeader()->getTerminator()
608406c3fb27SDimitry Andric : IVIncInsertPos;
608506c3fb27SDimitry Andric Rewriter.setIVIncInsertPos(L, InsertPos);
6086fcaf7f86SDimitry Andric Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
60870b57cec5SDimitry Andric Changed = true;
60880b57cec5SDimitry Andric }
60890b57cec5SDimitry Andric
60900b57cec5SDimitry Andric for (const IVChain &Chain : IVChainVec) {
6091fcaf7f86SDimitry Andric GenerateIVChain(Chain, DeadInsts);
60920b57cec5SDimitry Andric Changed = true;
60930b57cec5SDimitry Andric }
6094fe6060f1SDimitry Andric
6095fe6060f1SDimitry Andric for (const WeakVH &IV : Rewriter.getInsertedIVs())
6096fe6060f1SDimitry Andric if (IV && dyn_cast<Instruction>(&*IV)->getParent())
6097fe6060f1SDimitry Andric ScalarEvolutionIVs.push_back(IV);
6098fe6060f1SDimitry Andric
60990b57cec5SDimitry Andric // Clean up after ourselves. This must be done before deleting any
61000b57cec5SDimitry Andric // instructions.
61010b57cec5SDimitry Andric Rewriter.clear();
61020b57cec5SDimitry Andric
61035ffd83dbSDimitry Andric Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
61045ffd83dbSDimitry Andric &TLI, MSSAU);
6105fe6060f1SDimitry Andric
6106fe6060f1SDimitry Andric // In our cost analysis above, we assume that each addrec consumes exactly
6107fe6060f1SDimitry Andric // one register, and arrange to have increments inserted just before the
6108fe6060f1SDimitry Andric // latch to maximimize the chance this is true. However, if we reused
6109fe6060f1SDimitry Andric // existing IVs, we now need to move the increments to match our
6110fe6060f1SDimitry Andric // expectations. Otherwise, our cost modeling results in us having a
6111fe6060f1SDimitry Andric // chosen a non-optimal result for the actual schedule. (And yes, this
6112fe6060f1SDimitry Andric // scheduling decision does impact later codegen.)
6113fe6060f1SDimitry Andric for (PHINode &PN : L->getHeader()->phis()) {
6114fe6060f1SDimitry Andric BinaryOperator *BO = nullptr;
6115fe6060f1SDimitry Andric Value *Start = nullptr, *Step = nullptr;
6116fe6060f1SDimitry Andric if (!matchSimpleRecurrence(&PN, BO, Start, Step))
6117fe6060f1SDimitry Andric continue;
6118fe6060f1SDimitry Andric
6119fe6060f1SDimitry Andric switch (BO->getOpcode()) {
6120fe6060f1SDimitry Andric case Instruction::Sub:
6121fe6060f1SDimitry Andric if (BO->getOperand(0) != &PN)
6122fe6060f1SDimitry Andric // sub is non-commutative - match handling elsewhere in LSR
6123fe6060f1SDimitry Andric continue;
6124fe6060f1SDimitry Andric break;
6125fe6060f1SDimitry Andric case Instruction::Add:
6126fe6060f1SDimitry Andric break;
6127fe6060f1SDimitry Andric default:
6128fe6060f1SDimitry Andric continue;
6129fe6060f1SDimitry Andric };
6130fe6060f1SDimitry Andric
6131fe6060f1SDimitry Andric if (!isa<Constant>(Step))
6132fe6060f1SDimitry Andric // If not a constant step, might increase register pressure
6133fe6060f1SDimitry Andric // (We assume constants have been canonicalized to RHS)
6134fe6060f1SDimitry Andric continue;
6135fe6060f1SDimitry Andric
6136fe6060f1SDimitry Andric if (BO->getParent() == IVIncInsertPos->getParent())
6137fe6060f1SDimitry Andric // Only bother moving across blocks. Isel can handle block local case.
6138fe6060f1SDimitry Andric continue;
6139fe6060f1SDimitry Andric
6140fe6060f1SDimitry Andric // Can we legally schedule inc at the desired point?
6141fe6060f1SDimitry Andric if (!llvm::all_of(BO->uses(),
6142fe6060f1SDimitry Andric [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6143fe6060f1SDimitry Andric continue;
6144fe6060f1SDimitry Andric BO->moveBefore(IVIncInsertPos);
6145fe6060f1SDimitry Andric Changed = true;
6146fe6060f1SDimitry Andric }
6147fe6060f1SDimitry Andric
6148fe6060f1SDimitry Andric
61490b57cec5SDimitry Andric }
61500b57cec5SDimitry Andric
LSRInstance(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI,AssumptionCache & AC,TargetLibraryInfo & TLI,MemorySSAUpdater * MSSAU)61510b57cec5SDimitry Andric LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
61520b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI,
61530b57cec5SDimitry Andric const TargetTransformInfo &TTI, AssumptionCache &AC,
61545ffd83dbSDimitry Andric TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
61555ffd83dbSDimitry Andric : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
6156fcaf7f86SDimitry Andric MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
6157fcaf7f86SDimitry Andric ? PreferredAddresingMode
6158fcaf7f86SDimitry Andric : TTI.getPreferredAddressingMode(L, &SE)),
61590fca6ea1SDimitry Andric Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false),
6160bdd1243dSDimitry Andric BaselineCost(L, SE, TTI, AMK) {
61610b57cec5SDimitry Andric // If LoopSimplify form is not available, stay out of trouble.
61620b57cec5SDimitry Andric if (!L->isLoopSimplifyForm())
61630b57cec5SDimitry Andric return;
61640b57cec5SDimitry Andric
61650b57cec5SDimitry Andric // If there's no interesting work to be done, bail early.
61660b57cec5SDimitry Andric if (IU.empty()) return;
61670b57cec5SDimitry Andric
61680b57cec5SDimitry Andric // If there's too much analysis to be done, bail early. We won't be able to
61690b57cec5SDimitry Andric // model the problem anyway.
61700b57cec5SDimitry Andric unsigned NumUsers = 0;
61710b57cec5SDimitry Andric for (const IVStrideUse &U : IU) {
61720b57cec5SDimitry Andric if (++NumUsers > MaxIVUsers) {
61730b57cec5SDimitry Andric (void)U;
61740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
61750b57cec5SDimitry Andric << "\n");
61760b57cec5SDimitry Andric return;
61770b57cec5SDimitry Andric }
61780b57cec5SDimitry Andric // Bail out if we have a PHI on an EHPad that gets a value from a
61790b57cec5SDimitry Andric // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
61800b57cec5SDimitry Andric // no good place to stick any instructions.
61810b57cec5SDimitry Andric if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
61820b57cec5SDimitry Andric auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
61830b57cec5SDimitry Andric if (isa<FuncletPadInst>(FirstNonPHI) ||
61840b57cec5SDimitry Andric isa<CatchSwitchInst>(FirstNonPHI))
61850b57cec5SDimitry Andric for (BasicBlock *PredBB : PN->blocks())
61860b57cec5SDimitry Andric if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
61870b57cec5SDimitry Andric return;
61880b57cec5SDimitry Andric }
61890b57cec5SDimitry Andric }
61900b57cec5SDimitry Andric
61910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nLSR on loop ";
61920b57cec5SDimitry Andric L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
61930b57cec5SDimitry Andric dbgs() << ":\n");
61940b57cec5SDimitry Andric
6195fcaf7f86SDimitry Andric // Configure SCEVExpander already now, so the correct mode is used for
6196fcaf7f86SDimitry Andric // isSafeToExpand() checks.
6197fcaf7f86SDimitry Andric #ifndef NDEBUG
6198fcaf7f86SDimitry Andric Rewriter.setDebugType(DEBUG_TYPE);
6199fcaf7f86SDimitry Andric #endif
6200fcaf7f86SDimitry Andric Rewriter.disableCanonicalMode();
6201fcaf7f86SDimitry Andric Rewriter.enableLSRMode();
6202fcaf7f86SDimitry Andric
62030b57cec5SDimitry Andric // First, perform some low-level loop optimizations.
62040b57cec5SDimitry Andric OptimizeShadowIV();
62050b57cec5SDimitry Andric OptimizeLoopTermCond();
62060b57cec5SDimitry Andric
62070b57cec5SDimitry Andric // If loop preparation eliminates all interesting IV users, bail.
62080b57cec5SDimitry Andric if (IU.empty()) return;
62090b57cec5SDimitry Andric
62100b57cec5SDimitry Andric // Skip nested loops until we can model them better with formulae.
6211e8d8bef9SDimitry Andric if (!L->isInnermost()) {
62120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
62130b57cec5SDimitry Andric return;
62140b57cec5SDimitry Andric }
62150b57cec5SDimitry Andric
62160b57cec5SDimitry Andric // Start collecting data and preparing for the solver.
6217e8d8bef9SDimitry Andric // If number of registers is not the major cost, we cannot benefit from the
6218e8d8bef9SDimitry Andric // current profitable chain optimization which is based on number of
6219e8d8bef9SDimitry Andric // registers.
6220e8d8bef9SDimitry Andric // FIXME: add profitable chain optimization for other kinds major cost, for
6221e8d8bef9SDimitry Andric // example number of instructions.
6222e8d8bef9SDimitry Andric if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
62230b57cec5SDimitry Andric CollectChains();
62240b57cec5SDimitry Andric CollectInterestingTypesAndFactors();
62250b57cec5SDimitry Andric CollectFixupsAndInitialFormulae();
62260b57cec5SDimitry Andric CollectLoopInvariantFixupsAndFormulae();
62270b57cec5SDimitry Andric
62280b57cec5SDimitry Andric if (Uses.empty())
62290b57cec5SDimitry Andric return;
62300b57cec5SDimitry Andric
62310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
62320b57cec5SDimitry Andric print_uses(dbgs()));
62330fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "The baseline solution requires ";
62340fca6ea1SDimitry Andric BaselineCost.print(dbgs()); dbgs() << "\n");
62350b57cec5SDimitry Andric
62360b57cec5SDimitry Andric // Now use the reuse data to generate a bunch of interesting ways
62370b57cec5SDimitry Andric // to formulate the values needed for the uses.
62380b57cec5SDimitry Andric GenerateAllReuseFormulae();
62390b57cec5SDimitry Andric
62400b57cec5SDimitry Andric FilterOutUndesirableDedicatedRegisters();
62410b57cec5SDimitry Andric NarrowSearchSpaceUsingHeuristics();
62420b57cec5SDimitry Andric
62430b57cec5SDimitry Andric SmallVector<const Formula *, 8> Solution;
62440b57cec5SDimitry Andric Solve(Solution);
62450b57cec5SDimitry Andric
62460b57cec5SDimitry Andric // Release memory that is no longer needed.
62470b57cec5SDimitry Andric Factors.clear();
62480b57cec5SDimitry Andric Types.clear();
62490b57cec5SDimitry Andric RegUses.clear();
62500b57cec5SDimitry Andric
62510b57cec5SDimitry Andric if (Solution.empty())
62520b57cec5SDimitry Andric return;
62530b57cec5SDimitry Andric
62540b57cec5SDimitry Andric #ifndef NDEBUG
62550b57cec5SDimitry Andric // Formulae should be legal.
62560b57cec5SDimitry Andric for (const LSRUse &LU : Uses) {
62570b57cec5SDimitry Andric for (const Formula &F : LU.Formulae)
62580b57cec5SDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
62590b57cec5SDimitry Andric F) && "Illegal formula generated!");
62600b57cec5SDimitry Andric };
62610b57cec5SDimitry Andric #endif
62620b57cec5SDimitry Andric
62630b57cec5SDimitry Andric // Now that we've decided what we want, make it so.
62640b57cec5SDimitry Andric ImplementSolution(Solution);
62650b57cec5SDimitry Andric }
62660b57cec5SDimitry Andric
62670b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print_factors_and_types(raw_ostream & OS) const62680b57cec5SDimitry Andric void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
62690b57cec5SDimitry Andric if (Factors.empty() && Types.empty()) return;
62700b57cec5SDimitry Andric
62710b57cec5SDimitry Andric OS << "LSR has identified the following interesting factors and types: ";
62720b57cec5SDimitry Andric bool First = true;
62730b57cec5SDimitry Andric
62740b57cec5SDimitry Andric for (int64_t Factor : Factors) {
62750b57cec5SDimitry Andric if (!First) OS << ", ";
62760b57cec5SDimitry Andric First = false;
62770b57cec5SDimitry Andric OS << '*' << Factor;
62780b57cec5SDimitry Andric }
62790b57cec5SDimitry Andric
62800b57cec5SDimitry Andric for (Type *Ty : Types) {
62810b57cec5SDimitry Andric if (!First) OS << ", ";
62820b57cec5SDimitry Andric First = false;
62830b57cec5SDimitry Andric OS << '(' << *Ty << ')';
62840b57cec5SDimitry Andric }
62850b57cec5SDimitry Andric OS << '\n';
62860b57cec5SDimitry Andric }
62870b57cec5SDimitry Andric
print_fixups(raw_ostream & OS) const62880b57cec5SDimitry Andric void LSRInstance::print_fixups(raw_ostream &OS) const {
62890b57cec5SDimitry Andric OS << "LSR is examining the following fixup sites:\n";
62900b57cec5SDimitry Andric for (const LSRUse &LU : Uses)
62910b57cec5SDimitry Andric for (const LSRFixup &LF : LU.Fixups) {
62920b57cec5SDimitry Andric dbgs() << " ";
62930b57cec5SDimitry Andric LF.print(OS);
62940b57cec5SDimitry Andric OS << '\n';
62950b57cec5SDimitry Andric }
62960b57cec5SDimitry Andric }
62970b57cec5SDimitry Andric
print_uses(raw_ostream & OS) const62980b57cec5SDimitry Andric void LSRInstance::print_uses(raw_ostream &OS) const {
62990b57cec5SDimitry Andric OS << "LSR is examining the following uses:\n";
63000b57cec5SDimitry Andric for (const LSRUse &LU : Uses) {
63010b57cec5SDimitry Andric dbgs() << " ";
63020b57cec5SDimitry Andric LU.print(OS);
63030b57cec5SDimitry Andric OS << '\n';
63040b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) {
63050b57cec5SDimitry Andric OS << " ";
63060b57cec5SDimitry Andric F.print(OS);
63070b57cec5SDimitry Andric OS << '\n';
63080b57cec5SDimitry Andric }
63090b57cec5SDimitry Andric }
63100b57cec5SDimitry Andric }
63110b57cec5SDimitry Andric
print(raw_ostream & OS) const63120b57cec5SDimitry Andric void LSRInstance::print(raw_ostream &OS) const {
63130b57cec5SDimitry Andric print_factors_and_types(OS);
63140b57cec5SDimitry Andric print_fixups(OS);
63150b57cec5SDimitry Andric print_uses(OS);
63160b57cec5SDimitry Andric }
63170b57cec5SDimitry Andric
dump() const63180b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRInstance::dump() const {
63190b57cec5SDimitry Andric print(errs()); errs() << '\n';
63200b57cec5SDimitry Andric }
63210b57cec5SDimitry Andric #endif
63220b57cec5SDimitry Andric
63230b57cec5SDimitry Andric namespace {
63240b57cec5SDimitry Andric
63250b57cec5SDimitry Andric class LoopStrengthReduce : public LoopPass {
63260b57cec5SDimitry Andric public:
63270b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid
63280b57cec5SDimitry Andric
63290b57cec5SDimitry Andric LoopStrengthReduce();
63300b57cec5SDimitry Andric
63310b57cec5SDimitry Andric private:
63320b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override;
63330b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
63340b57cec5SDimitry Andric };
63350b57cec5SDimitry Andric
63360b57cec5SDimitry Andric } // end anonymous namespace
63370b57cec5SDimitry Andric
LoopStrengthReduce()63380b57cec5SDimitry Andric LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
63390b57cec5SDimitry Andric initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
63400b57cec5SDimitry Andric }
63410b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const63420b57cec5SDimitry Andric void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
63430b57cec5SDimitry Andric // We split critical edges, so we change the CFG. However, we do update
63440b57cec5SDimitry Andric // many analyses if they are around.
63450b57cec5SDimitry Andric AU.addPreservedID(LoopSimplifyID);
63460b57cec5SDimitry Andric
63470b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
63480b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
63490b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID);
63500b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
63510b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
63520b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
63530b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>();
63540b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>();
63550b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>();
63560b57cec5SDimitry Andric // Requiring LoopSimplify a second time here prevents IVUsers from running
63570b57cec5SDimitry Andric // twice, since LoopSimplify was invalidated by running ScalarEvolution.
63580b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID);
63590b57cec5SDimitry Andric AU.addRequired<IVUsersWrapperPass>();
63600b57cec5SDimitry Andric AU.addPreserved<IVUsersWrapperPass>();
63610b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
63625ffd83dbSDimitry Andric AU.addPreserved<MemorySSAWrapperPass>();
63630b57cec5SDimitry Andric }
63640b57cec5SDimitry Andric
6365349cc55cSDimitry Andric namespace {
636681ad6265SDimitry Andric
636781ad6265SDimitry Andric /// Enables more convenient iteration over a DWARF expression vector.
636881ad6265SDimitry Andric static iterator_range<llvm::DIExpression::expr_op_iterator>
ToDwarfOpIter(SmallVectorImpl<uint64_t> & Expr)636981ad6265SDimitry Andric ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
637081ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator Begin =
637181ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator(Expr.begin());
637281ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator End =
637381ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator(Expr.end());
637481ad6265SDimitry Andric return {Begin, End};
637581ad6265SDimitry Andric }
637681ad6265SDimitry Andric
6377fe6060f1SDimitry Andric struct SCEVDbgValueBuilder {
6378fe6060f1SDimitry Andric SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder__anonc21373341411::SCEVDbgValueBuilder637981ad6265SDimitry Andric SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
638081ad6265SDimitry Andric
clone__anonc21373341411::SCEVDbgValueBuilder638181ad6265SDimitry Andric void clone(const SCEVDbgValueBuilder &Base) {
638281ad6265SDimitry Andric LocationOps = Base.LocationOps;
6383fe6060f1SDimitry Andric Expr = Base.Expr;
6384fe6060f1SDimitry Andric }
6385e8d8bef9SDimitry Andric
clear__anonc21373341411::SCEVDbgValueBuilder638681ad6265SDimitry Andric void clear() {
638781ad6265SDimitry Andric LocationOps.clear();
638881ad6265SDimitry Andric Expr.clear();
638981ad6265SDimitry Andric }
639081ad6265SDimitry Andric
6391fe6060f1SDimitry Andric /// The DIExpression as we translate the SCEV.
6392fe6060f1SDimitry Andric SmallVector<uint64_t, 6> Expr;
6393fe6060f1SDimitry Andric /// The location ops of the DIExpression.
639481ad6265SDimitry Andric SmallVector<Value *, 2> LocationOps;
6395fe6060f1SDimitry Andric
pushOperator__anonc21373341411::SCEVDbgValueBuilder6396fe6060f1SDimitry Andric void pushOperator(uint64_t Op) { Expr.push_back(Op); }
pushUInt__anonc21373341411::SCEVDbgValueBuilder6397fe6060f1SDimitry Andric void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
6398fe6060f1SDimitry Andric
6399fe6060f1SDimitry Andric /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6400fe6060f1SDimitry Andric /// in the set of values referenced by the expression.
pushLocation__anonc21373341411::SCEVDbgValueBuilder640181ad6265SDimitry Andric void pushLocation(llvm::Value *V) {
6402fe6060f1SDimitry Andric Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
6403bdd1243dSDimitry Andric auto *It = llvm::find(LocationOps, V);
6404fe6060f1SDimitry Andric unsigned ArgIndex = 0;
640581ad6265SDimitry Andric if (It != LocationOps.end()) {
640681ad6265SDimitry Andric ArgIndex = std::distance(LocationOps.begin(), It);
6407fe6060f1SDimitry Andric } else {
640881ad6265SDimitry Andric ArgIndex = LocationOps.size();
640981ad6265SDimitry Andric LocationOps.push_back(V);
6410fe6060f1SDimitry Andric }
6411fe6060f1SDimitry Andric Expr.push_back(ArgIndex);
6412fe6060f1SDimitry Andric }
6413fe6060f1SDimitry Andric
pushValue__anonc21373341411::SCEVDbgValueBuilder6414fe6060f1SDimitry Andric void pushValue(const SCEVUnknown *U) {
6415fe6060f1SDimitry Andric llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
641681ad6265SDimitry Andric pushLocation(V);
6417fe6060f1SDimitry Andric }
6418fe6060f1SDimitry Andric
pushConst__anonc21373341411::SCEVDbgValueBuilder64196e75b2fbSDimitry Andric bool pushConst(const SCEVConstant *C) {
642006c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() > 64)
64216e75b2fbSDimitry Andric return false;
6422fe6060f1SDimitry Andric Expr.push_back(llvm::dwarf::DW_OP_consts);
6423fe6060f1SDimitry Andric Expr.push_back(C->getAPInt().getSExtValue());
64246e75b2fbSDimitry Andric return true;
6425fe6060f1SDimitry Andric }
6426fe6060f1SDimitry Andric
642781ad6265SDimitry Andric // Iterating the expression as DWARF ops is convenient when updating
642881ad6265SDimitry Andric // DWARF_OP_LLVM_args.
expr_ops__anonc21373341411::SCEVDbgValueBuilder642981ad6265SDimitry Andric iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
643081ad6265SDimitry Andric return ToDwarfOpIter(Expr);
643181ad6265SDimitry Andric }
643281ad6265SDimitry Andric
6433fe6060f1SDimitry Andric /// Several SCEV types are sequences of the same arithmetic operator applied
6434fe6060f1SDimitry Andric /// to constants and values that may be extended or truncated.
pushArithmeticExpr__anonc21373341411::SCEVDbgValueBuilder6435fe6060f1SDimitry Andric bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6436fe6060f1SDimitry Andric uint64_t DwarfOp) {
6437fe6060f1SDimitry Andric assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6438fe6060f1SDimitry Andric "Expected arithmetic SCEV type");
6439fe6060f1SDimitry Andric bool Success = true;
6440fe6060f1SDimitry Andric unsigned EmitOperator = 0;
6441bdd1243dSDimitry Andric for (const auto &Op : CommExpr->operands()) {
6442fe6060f1SDimitry Andric Success &= pushSCEV(Op);
6443fe6060f1SDimitry Andric
6444fe6060f1SDimitry Andric if (EmitOperator >= 1)
6445fe6060f1SDimitry Andric pushOperator(DwarfOp);
6446fe6060f1SDimitry Andric ++EmitOperator;
6447fe6060f1SDimitry Andric }
6448fe6060f1SDimitry Andric return Success;
6449fe6060f1SDimitry Andric }
6450fe6060f1SDimitry Andric
6451fe6060f1SDimitry Andric // TODO: Identify and omit noop casts.
pushCast__anonc21373341411::SCEVDbgValueBuilder6452fe6060f1SDimitry Andric bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
6453fe6060f1SDimitry Andric const llvm::SCEV *Inner = C->getOperand(0);
6454fe6060f1SDimitry Andric const llvm::Type *Type = C->getType();
6455fe6060f1SDimitry Andric uint64_t ToWidth = Type->getIntegerBitWidth();
6456fe6060f1SDimitry Andric bool Success = pushSCEV(Inner);
6457fe6060f1SDimitry Andric uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6458fe6060f1SDimitry Andric IsSigned ? llvm::dwarf::DW_ATE_signed
6459fe6060f1SDimitry Andric : llvm::dwarf::DW_ATE_unsigned};
6460fe6060f1SDimitry Andric for (const auto &Op : CastOps)
6461fe6060f1SDimitry Andric pushOperator(Op);
6462fe6060f1SDimitry Andric return Success;
6463fe6060f1SDimitry Andric }
6464fe6060f1SDimitry Andric
6465fe6060f1SDimitry Andric // TODO: MinMax - although these haven't been encountered in the test suite.
pushSCEV__anonc21373341411::SCEVDbgValueBuilder6466fe6060f1SDimitry Andric bool pushSCEV(const llvm::SCEV *S) {
6467fe6060f1SDimitry Andric bool Success = true;
6468fe6060f1SDimitry Andric if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
64696e75b2fbSDimitry Andric Success &= pushConst(StartInt);
6470fe6060f1SDimitry Andric
6471fe6060f1SDimitry Andric } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6472fe6060f1SDimitry Andric if (!U->getValue())
6473fe6060f1SDimitry Andric return false;
647481ad6265SDimitry Andric pushLocation(U->getValue());
6475fe6060f1SDimitry Andric
6476fe6060f1SDimitry Andric } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6477fe6060f1SDimitry Andric Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6478fe6060f1SDimitry Andric
6479fe6060f1SDimitry Andric } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6480fe6060f1SDimitry Andric Success &= pushSCEV(UDiv->getLHS());
6481fe6060f1SDimitry Andric Success &= pushSCEV(UDiv->getRHS());
6482fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_div);
6483fe6060f1SDimitry Andric
6484fe6060f1SDimitry Andric } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6485fe6060f1SDimitry Andric // Assert if a new and unknown SCEVCastEXpr type is encountered.
6486fe6060f1SDimitry Andric assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6487fe6060f1SDimitry Andric isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6488fe6060f1SDimitry Andric "Unexpected cast type in SCEV.");
6489fe6060f1SDimitry Andric Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6490fe6060f1SDimitry Andric
6491fe6060f1SDimitry Andric } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6492fe6060f1SDimitry Andric Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6493fe6060f1SDimitry Andric
6494fe6060f1SDimitry Andric } else if (isa<SCEVAddRecExpr>(S)) {
6495fe6060f1SDimitry Andric // Nested SCEVAddRecExpr are generated by nested loops and are currently
6496fe6060f1SDimitry Andric // unsupported.
6497fe6060f1SDimitry Andric return false;
6498fe6060f1SDimitry Andric
6499fe6060f1SDimitry Andric } else {
6500fe6060f1SDimitry Andric return false;
6501fe6060f1SDimitry Andric }
6502fe6060f1SDimitry Andric return Success;
6503fe6060f1SDimitry Andric }
6504fe6060f1SDimitry Andric
6505fe6060f1SDimitry Andric /// Return true if the combination of arithmetic operator and underlying
6506fe6060f1SDimitry Andric /// SCEV constant value is an identity function.
isIdentityFunction__anonc21373341411::SCEVDbgValueBuilder6507fe6060f1SDimitry Andric bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6508fe6060f1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
650906c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() > 64)
65106e75b2fbSDimitry Andric return false;
6511fe6060f1SDimitry Andric int64_t I = C->getAPInt().getSExtValue();
6512fe6060f1SDimitry Andric switch (Op) {
6513fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_plus:
6514fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_minus:
6515fe6060f1SDimitry Andric return I == 0;
6516fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_mul:
6517fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_div:
6518fe6060f1SDimitry Andric return I == 1;
6519fe6060f1SDimitry Andric }
6520fe6060f1SDimitry Andric }
6521fe6060f1SDimitry Andric return false;
6522fe6060f1SDimitry Andric }
6523fe6060f1SDimitry Andric
6524fe6060f1SDimitry Andric /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6525fe6060f1SDimitry Andric /// builder's expression stack. The stack should already contain an
6526fe6060f1SDimitry Andric /// expression for the iteration count, so that it can be multiplied by
6527fe6060f1SDimitry Andric /// the stride and added to the start.
6528fe6060f1SDimitry Andric /// Components of the expression are omitted if they are an identity function.
6529fe6060f1SDimitry Andric /// Chain (non-affine) SCEVs are not supported.
SCEVToValueExpr__anonc21373341411::SCEVDbgValueBuilder6530fe6060f1SDimitry Andric bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6531fe6060f1SDimitry Andric assert(SAR.isAffine() && "Expected affine SCEV");
6532fe6060f1SDimitry Andric // TODO: Is this check needed?
6533fe6060f1SDimitry Andric if (isa<SCEVAddRecExpr>(SAR.getStart()))
6534fe6060f1SDimitry Andric return false;
6535fe6060f1SDimitry Andric
6536fe6060f1SDimitry Andric const SCEV *Start = SAR.getStart();
6537fe6060f1SDimitry Andric const SCEV *Stride = SAR.getStepRecurrence(SE);
6538fe6060f1SDimitry Andric
6539fe6060f1SDimitry Andric // Skip pushing arithmetic noops.
6540fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6541fe6060f1SDimitry Andric if (!pushSCEV(Stride))
6542fe6060f1SDimitry Andric return false;
6543fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_mul);
6544fe6060f1SDimitry Andric }
6545fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6546fe6060f1SDimitry Andric if (!pushSCEV(Start))
6547fe6060f1SDimitry Andric return false;
6548fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_plus);
6549fe6060f1SDimitry Andric }
6550fe6060f1SDimitry Andric return true;
6551fe6060f1SDimitry Andric }
6552fe6060f1SDimitry Andric
655381ad6265SDimitry Andric /// Create an expression that is an offset from a value (usually the IV).
createOffsetExpr__anonc21373341411::SCEVDbgValueBuilder655481ad6265SDimitry Andric void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
655581ad6265SDimitry Andric pushLocation(OffsetValue);
655681ad6265SDimitry Andric DIExpression::appendOffset(Expr, Offset);
655781ad6265SDimitry Andric LLVM_DEBUG(
655881ad6265SDimitry Andric dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
655981ad6265SDimitry Andric << std::to_string(Offset) << "\n");
656081ad6265SDimitry Andric }
656181ad6265SDimitry Andric
656281ad6265SDimitry Andric /// Combine a translation of the SCEV and the IV to create an expression that
656381ad6265SDimitry Andric /// recovers a location's value.
656481ad6265SDimitry Andric /// returns true if an expression was created.
createIterCountExpr__anonc21373341411::SCEVDbgValueBuilder656581ad6265SDimitry Andric bool createIterCountExpr(const SCEV *S,
656681ad6265SDimitry Andric const SCEVDbgValueBuilder &IterationCount,
656781ad6265SDimitry Andric ScalarEvolution &SE) {
656881ad6265SDimitry Andric // SCEVs for SSA values are most frquently of the form
656981ad6265SDimitry Andric // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
657081ad6265SDimitry Andric // This is because %a is a PHI node that is not the IV. However, these
657181ad6265SDimitry Andric // SCEVs have not been observed to result in debuginfo-lossy optimisations,
657281ad6265SDimitry Andric // so its not expected this point will be reached.
657381ad6265SDimitry Andric if (!isa<SCEVAddRecExpr>(S))
657481ad6265SDimitry Andric return false;
657581ad6265SDimitry Andric
657681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
657781ad6265SDimitry Andric << '\n');
657881ad6265SDimitry Andric
657981ad6265SDimitry Andric const auto *Rec = cast<SCEVAddRecExpr>(S);
658081ad6265SDimitry Andric if (!Rec->isAffine())
658181ad6265SDimitry Andric return false;
658281ad6265SDimitry Andric
658381ad6265SDimitry Andric if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
658481ad6265SDimitry Andric return false;
658581ad6265SDimitry Andric
658681ad6265SDimitry Andric // Initialise a new builder with the iteration count expression. In
658781ad6265SDimitry Andric // combination with the value's SCEV this enables recovery.
658881ad6265SDimitry Andric clone(IterationCount);
658981ad6265SDimitry Andric if (!SCEVToValueExpr(*Rec, SE))
659081ad6265SDimitry Andric return false;
659181ad6265SDimitry Andric
659281ad6265SDimitry Andric return true;
659381ad6265SDimitry Andric }
659481ad6265SDimitry Andric
6595fe6060f1SDimitry Andric /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6596fe6060f1SDimitry Andric /// builder's expression stack. The stack should already contain an
6597fe6060f1SDimitry Andric /// expression for the iteration count, so that it can be multiplied by
6598fe6060f1SDimitry Andric /// the stride and added to the start.
6599fe6060f1SDimitry Andric /// Components of the expression are omitted if they are an identity function.
SCEVToIterCountExpr__anonc21373341411::SCEVDbgValueBuilder6600fe6060f1SDimitry Andric bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6601fe6060f1SDimitry Andric ScalarEvolution &SE) {
6602fe6060f1SDimitry Andric assert(SAR.isAffine() && "Expected affine SCEV");
6603fe6060f1SDimitry Andric if (isa<SCEVAddRecExpr>(SAR.getStart())) {
6604fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
6605fe6060f1SDimitry Andric << SAR << '\n');
6606fe6060f1SDimitry Andric return false;
6607fe6060f1SDimitry Andric }
6608fe6060f1SDimitry Andric const SCEV *Start = SAR.getStart();
6609fe6060f1SDimitry Andric const SCEV *Stride = SAR.getStepRecurrence(SE);
6610fe6060f1SDimitry Andric
6611fe6060f1SDimitry Andric // Skip pushing arithmetic noops.
6612fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6613fe6060f1SDimitry Andric if (!pushSCEV(Start))
6614fe6060f1SDimitry Andric return false;
6615fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_minus);
6616fe6060f1SDimitry Andric }
6617fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6618fe6060f1SDimitry Andric if (!pushSCEV(Stride))
6619fe6060f1SDimitry Andric return false;
6620fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_div);
6621fe6060f1SDimitry Andric }
6622fe6060f1SDimitry Andric return true;
6623fe6060f1SDimitry Andric }
662481ad6265SDimitry Andric
662581ad6265SDimitry Andric // Append the current expression and locations to a location list and an
662681ad6265SDimitry Andric // expression list. Modify the DW_OP_LLVM_arg indexes to account for
662781ad6265SDimitry Andric // the locations already present in the destination list.
appendToVectors__anonc21373341411::SCEVDbgValueBuilder662881ad6265SDimitry Andric void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
662981ad6265SDimitry Andric SmallVectorImpl<Value *> &DestLocations) {
663081ad6265SDimitry Andric assert(!DestLocations.empty() &&
663181ad6265SDimitry Andric "Expected the locations vector to contain the IV");
663281ad6265SDimitry Andric // The DWARF_OP_LLVM_arg arguments of the expression being appended must be
663381ad6265SDimitry Andric // modified to account for the locations already in the destination vector.
663481ad6265SDimitry Andric // All builders contain the IV as the first location op.
663581ad6265SDimitry Andric assert(!LocationOps.empty() &&
663681ad6265SDimitry Andric "Expected the location ops to contain the IV.");
663781ad6265SDimitry Andric // DestIndexMap[n] contains the index in DestLocations for the nth
663881ad6265SDimitry Andric // location in this SCEVDbgValueBuilder.
663981ad6265SDimitry Andric SmallVector<uint64_t, 2> DestIndexMap;
664081ad6265SDimitry Andric for (const auto &Op : LocationOps) {
664181ad6265SDimitry Andric auto It = find(DestLocations, Op);
664281ad6265SDimitry Andric if (It != DestLocations.end()) {
664381ad6265SDimitry Andric // Location already exists in DestLocations, reuse existing ArgIndex.
664481ad6265SDimitry Andric DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
664581ad6265SDimitry Andric continue;
664681ad6265SDimitry Andric }
664781ad6265SDimitry Andric // Location is not in DestLocations, add it.
664881ad6265SDimitry Andric DestIndexMap.push_back(DestLocations.size());
664981ad6265SDimitry Andric DestLocations.push_back(Op);
665081ad6265SDimitry Andric }
665181ad6265SDimitry Andric
665281ad6265SDimitry Andric for (const auto &Op : expr_ops()) {
665381ad6265SDimitry Andric if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
665481ad6265SDimitry Andric Op.appendToVector(DestExpr);
665581ad6265SDimitry Andric continue;
665681ad6265SDimitry Andric }
665781ad6265SDimitry Andric
665881ad6265SDimitry Andric DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
665981ad6265SDimitry Andric // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
666081ad6265SDimitry Andric // DestIndexMap[n] contains its new index in DestLocations.
666181ad6265SDimitry Andric uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
666281ad6265SDimitry Andric DestExpr.push_back(NewIndex);
666381ad6265SDimitry Andric }
666481ad6265SDimitry Andric }
6665fe6060f1SDimitry Andric };
6666fe6060f1SDimitry Andric
666781ad6265SDimitry Andric /// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
666881ad6265SDimitry Andric /// and DIExpression.
6669fe6060f1SDimitry Andric struct DVIRecoveryRec {
DVIRecoveryRec__anonc21373341411::DVIRecoveryRec667081ad6265SDimitry Andric DVIRecoveryRec(DbgValueInst *DbgValue)
66717a6dacacSDimitry Andric : DbgRef(DbgValue), Expr(DbgValue->getExpression()),
667281ad6265SDimitry Andric HadLocationArgList(false) {}
DVIRecoveryRec__anonc21373341411::DVIRecoveryRec66730fca6ea1SDimitry Andric DVIRecoveryRec(DbgVariableRecord *DVR)
66740fca6ea1SDimitry Andric : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
667581ad6265SDimitry Andric
66760fca6ea1SDimitry Andric PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;
6677fe6060f1SDimitry Andric DIExpression *Expr;
667881ad6265SDimitry Andric bool HadLocationArgList;
667981ad6265SDimitry Andric SmallVector<WeakVH, 2> LocationOps;
668081ad6265SDimitry Andric SmallVector<const llvm::SCEV *, 2> SCEVs;
668181ad6265SDimitry Andric SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
668281ad6265SDimitry Andric
clear__anonc21373341411::DVIRecoveryRec668381ad6265SDimitry Andric void clear() {
668481ad6265SDimitry Andric for (auto &RE : RecoveryExprs)
668581ad6265SDimitry Andric RE.reset();
668681ad6265SDimitry Andric RecoveryExprs.clear();
668781ad6265SDimitry Andric }
668881ad6265SDimitry Andric
~DVIRecoveryRec__anonc21373341411::DVIRecoveryRec668981ad6265SDimitry Andric ~DVIRecoveryRec() { clear(); }
6690fe6060f1SDimitry Andric };
6691349cc55cSDimitry Andric } // namespace
6692fe6060f1SDimitry Andric
669381ad6265SDimitry Andric /// Returns the total number of DW_OP_llvm_arg operands in the expression.
669481ad6265SDimitry Andric /// This helps in determining if a DIArglist is necessary or can be omitted from
669581ad6265SDimitry Andric /// the dbg.value.
numLLVMArgOps(SmallVectorImpl<uint64_t> & Expr)669681ad6265SDimitry Andric static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
669781ad6265SDimitry Andric auto expr_ops = ToDwarfOpIter(Expr);
669881ad6265SDimitry Andric unsigned Count = 0;
669981ad6265SDimitry Andric for (auto Op : expr_ops)
670081ad6265SDimitry Andric if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
670181ad6265SDimitry Andric Count++;
670281ad6265SDimitry Andric return Count;
6703fe6060f1SDimitry Andric }
6704fe6060f1SDimitry Andric
670581ad6265SDimitry Andric /// Overwrites DVI with the location and Ops as the DIExpression. This will
670681ad6265SDimitry Andric /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
670781ad6265SDimitry Andric /// because a DIArglist is not created for the first argument of the dbg.value.
67087a6dacacSDimitry Andric template <typename T>
updateDVIWithLocation(T & DbgVal,Value * Location,SmallVectorImpl<uint64_t> & Ops)67097a6dacacSDimitry Andric static void updateDVIWithLocation(T &DbgVal, Value *Location,
671081ad6265SDimitry Andric SmallVectorImpl<uint64_t> &Ops) {
67117a6dacacSDimitry Andric assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not "
67127a6dacacSDimitry Andric "contain any DW_OP_llvm_arg operands.");
67137a6dacacSDimitry Andric DbgVal.setRawLocation(ValueAsMetadata::get(Location));
67147a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
67157a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6716349cc55cSDimitry Andric }
6717349cc55cSDimitry Andric
671881ad6265SDimitry Andric /// Overwrite DVI with locations placed into a DIArglist.
67197a6dacacSDimitry Andric template <typename T>
updateDVIWithLocations(T & DbgVal,SmallVectorImpl<Value * > & Locations,SmallVectorImpl<uint64_t> & Ops)67207a6dacacSDimitry Andric static void updateDVIWithLocations(T &DbgVal,
672181ad6265SDimitry Andric SmallVectorImpl<Value *> &Locations,
672281ad6265SDimitry Andric SmallVectorImpl<uint64_t> &Ops) {
672381ad6265SDimitry Andric assert(numLLVMArgOps(Ops) != 0 &&
672481ad6265SDimitry Andric "Expected expression that references DIArglist locations using "
672581ad6265SDimitry Andric "DW_OP_llvm_arg operands.");
672681ad6265SDimitry Andric SmallVector<ValueAsMetadata *, 3> MetadataLocs;
672781ad6265SDimitry Andric for (Value *V : Locations)
672881ad6265SDimitry Andric MetadataLocs.push_back(ValueAsMetadata::get(V));
672981ad6265SDimitry Andric auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
67307a6dacacSDimitry Andric DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));
67317a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
673281ad6265SDimitry Andric }
673381ad6265SDimitry Andric
673481ad6265SDimitry Andric /// Write the new expression and new location ops for the dbg.value. If possible
673581ad6265SDimitry Andric /// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
673681ad6265SDimitry Andric /// can be omitted if:
673781ad6265SDimitry Andric /// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
673881ad6265SDimitry Andric /// 2. The DW_OP_LLVM_arg is the first operand in the expression.
UpdateDbgValueInst(DVIRecoveryRec & DVIRec,SmallVectorImpl<Value * > & NewLocationOps,SmallVectorImpl<uint64_t> & NewExpr)673981ad6265SDimitry Andric static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
674081ad6265SDimitry Andric SmallVectorImpl<Value *> &NewLocationOps,
674181ad6265SDimitry Andric SmallVectorImpl<uint64_t> &NewExpr) {
67427a6dacacSDimitry Andric auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
674381ad6265SDimitry Andric unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
674481ad6265SDimitry Andric if (NumLLVMArgs == 0) {
674581ad6265SDimitry Andric // Location assumed to be on the stack.
67467a6dacacSDimitry Andric updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);
674781ad6265SDimitry Andric } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
674881ad6265SDimitry Andric // There is only a single DW_OP_llvm_arg at the start of the expression,
674981ad6265SDimitry Andric // so it can be omitted along with DIArglist.
675081ad6265SDimitry Andric assert(NewExpr[1] == 0 &&
675181ad6265SDimitry Andric "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
675281ad6265SDimitry Andric llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
67537a6dacacSDimitry Andric updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);
675481ad6265SDimitry Andric } else {
675581ad6265SDimitry Andric // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
67567a6dacacSDimitry Andric updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
675781ad6265SDimitry Andric }
675881ad6265SDimitry Andric
675981ad6265SDimitry Andric // If the DIExpression was previously empty then add the stack terminator.
67607a6dacacSDimitry Andric // Non-empty expressions have only had elements inserted into them and so
67617a6dacacSDimitry Andric // the terminator should already be present e.g. stack_value or fragment.
67627a6dacacSDimitry Andric DIExpression *SalvageExpr = DbgVal->getExpression();
676381ad6265SDimitry Andric if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
67647a6dacacSDimitry Andric SalvageExpr =
67657a6dacacSDimitry Andric DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
67667a6dacacSDimitry Andric DbgVal->setExpression(SalvageExpr);
676781ad6265SDimitry Andric }
67687a6dacacSDimitry Andric };
67697a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef))
67707a6dacacSDimitry Andric UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
67717a6dacacSDimitry Andric else
67720fca6ea1SDimitry Andric UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
677381ad6265SDimitry Andric }
677481ad6265SDimitry Andric
677506c3fb27SDimitry Andric /// Cached location ops may be erased during LSR, in which case a poison is
677681ad6265SDimitry Andric /// required when restoring from the cache. The type of that location is no
677706c3fb27SDimitry Andric /// longer available, so just use int8. The poison will be replaced by one or
677881ad6265SDimitry Andric /// more locations later when a SCEVDbgValueBuilder selects alternative
677981ad6265SDimitry Andric /// locations to use for the salvage.
getValueOrPoison(WeakVH & VH,LLVMContext & C)678006c3fb27SDimitry Andric static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
678106c3fb27SDimitry Andric return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C));
678281ad6265SDimitry Andric }
678381ad6265SDimitry Andric
678481ad6265SDimitry Andric /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
restorePreTransformState(DVIRecoveryRec & DVIRec)678581ad6265SDimitry Andric static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
67867a6dacacSDimitry Andric auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
678781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
67887a6dacacSDimitry Andric << "scev-salvage: post-LSR: " << *DbgVal << '\n');
678981ad6265SDimitry Andric assert(DVIRec.Expr && "Expected an expression");
67907a6dacacSDimitry Andric DbgVal->setExpression(DVIRec.Expr);
679181ad6265SDimitry Andric
679281ad6265SDimitry Andric // Even a single location-op may be inside a DIArgList and referenced with
679381ad6265SDimitry Andric // DW_OP_LLVM_arg, which is valid only with a DIArgList.
679481ad6265SDimitry Andric if (!DVIRec.HadLocationArgList) {
679581ad6265SDimitry Andric assert(DVIRec.LocationOps.size() == 1 &&
679681ad6265SDimitry Andric "Unexpected number of location ops.");
679781ad6265SDimitry Andric // LSR's unsuccessful salvage attempt may have added DIArgList, which in
67987a6dacacSDimitry Andric // this case was not present before, so force the location back to a
67997a6dacacSDimitry Andric // single uncontained Value.
680081ad6265SDimitry Andric Value *CachedValue =
68017a6dacacSDimitry Andric getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());
68027a6dacacSDimitry Andric DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));
680381ad6265SDimitry Andric } else {
680481ad6265SDimitry Andric SmallVector<ValueAsMetadata *, 3> MetadataLocs;
680581ad6265SDimitry Andric for (WeakVH VH : DVIRec.LocationOps) {
68067a6dacacSDimitry Andric Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
680781ad6265SDimitry Andric MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
680881ad6265SDimitry Andric }
680981ad6265SDimitry Andric auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
68107a6dacacSDimitry Andric DbgVal->setRawLocation(
68117a6dacacSDimitry Andric llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));
681281ad6265SDimitry Andric }
68137a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');
68147a6dacacSDimitry Andric };
68157a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef))
68167a6dacacSDimitry Andric RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
68177a6dacacSDimitry Andric else
68180fca6ea1SDimitry Andric RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
681981ad6265SDimitry Andric }
682081ad6265SDimitry Andric
SalvageDVI(llvm::Loop * L,ScalarEvolution & SE,llvm::PHINode * LSRInductionVar,DVIRecoveryRec & DVIRec,const SCEV * SCEVInductionVar,SCEVDbgValueBuilder IterCountExpr)682181ad6265SDimitry Andric static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
682281ad6265SDimitry Andric llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
682381ad6265SDimitry Andric const SCEV *SCEVInductionVar,
682481ad6265SDimitry Andric SCEVDbgValueBuilder IterCountExpr) {
68257a6dacacSDimitry Andric
68267a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef)
68277a6dacacSDimitry Andric ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
68280fca6ea1SDimitry Andric : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
682981ad6265SDimitry Andric return false;
683081ad6265SDimitry Andric
683181ad6265SDimitry Andric // LSR may have caused several changes to the dbg.value in the failed salvage
683281ad6265SDimitry Andric // attempt. So restore the DIExpression, the location ops and also the
683381ad6265SDimitry Andric // location ops format, which is always DIArglist for multiple ops, but only
683481ad6265SDimitry Andric // sometimes for a single op.
683581ad6265SDimitry Andric restorePreTransformState(DVIRec);
683681ad6265SDimitry Andric
683781ad6265SDimitry Andric // LocationOpIndexMap[i] will store the post-LSR location index of
683881ad6265SDimitry Andric // the non-optimised out location at pre-LSR index i.
683981ad6265SDimitry Andric SmallVector<int64_t, 2> LocationOpIndexMap;
684081ad6265SDimitry Andric LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
684181ad6265SDimitry Andric SmallVector<Value *, 2> NewLocationOps;
684281ad6265SDimitry Andric NewLocationOps.push_back(LSRInductionVar);
684381ad6265SDimitry Andric
684481ad6265SDimitry Andric for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
684581ad6265SDimitry Andric WeakVH VH = DVIRec.LocationOps[i];
684681ad6265SDimitry Andric // Place the locations not optimised out in the list first, avoiding
684781ad6265SDimitry Andric // inserts later. The map is used to update the DIExpression's
684881ad6265SDimitry Andric // DW_OP_LLVM_arg arguments as the expression is updated.
684981ad6265SDimitry Andric if (VH && !isa<UndefValue>(VH)) {
685081ad6265SDimitry Andric NewLocationOps.push_back(VH);
685181ad6265SDimitry Andric LocationOpIndexMap[i] = NewLocationOps.size() - 1;
685281ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
685381ad6265SDimitry Andric << " now at index " << LocationOpIndexMap[i] << "\n");
685481ad6265SDimitry Andric continue;
685581ad6265SDimitry Andric }
685681ad6265SDimitry Andric
685781ad6265SDimitry Andric // It's possible that a value referred to in the SCEV may have been
685881ad6265SDimitry Andric // optimised out by LSR.
685981ad6265SDimitry Andric if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
686081ad6265SDimitry Andric SE.containsUndefs(DVIRec.SCEVs[i])) {
686181ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
686281ad6265SDimitry Andric << " refers to a location that is now undef or erased. "
686381ad6265SDimitry Andric "Salvage abandoned.\n");
686481ad6265SDimitry Andric return false;
686581ad6265SDimitry Andric }
686681ad6265SDimitry Andric
686781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
686881ad6265SDimitry Andric << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
686981ad6265SDimitry Andric
687081ad6265SDimitry Andric DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
687181ad6265SDimitry Andric SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
687281ad6265SDimitry Andric
687381ad6265SDimitry Andric // Create an offset-based salvage expression if possible, as it requires
687481ad6265SDimitry Andric // less DWARF ops than an iteration count-based expression.
6875bdd1243dSDimitry Andric if (std::optional<APInt> Offset =
687681ad6265SDimitry Andric SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
687706c3fb27SDimitry Andric if (Offset->getSignificantBits() <= 64)
6878bdd1243dSDimitry Andric SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
687981ad6265SDimitry Andric } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
688081ad6265SDimitry Andric SE))
688181ad6265SDimitry Andric return false;
688281ad6265SDimitry Andric }
688381ad6265SDimitry Andric
688481ad6265SDimitry Andric // Merge the DbgValueBuilder generated expressions and the original
688581ad6265SDimitry Andric // DIExpression, place the result into an new vector.
688681ad6265SDimitry Andric SmallVector<uint64_t, 3> NewExpr;
688781ad6265SDimitry Andric if (DVIRec.Expr->getNumElements() == 0) {
688881ad6265SDimitry Andric assert(DVIRec.RecoveryExprs.size() == 1 &&
688981ad6265SDimitry Andric "Expected only a single recovery expression for an empty "
689081ad6265SDimitry Andric "DIExpression.");
689181ad6265SDimitry Andric assert(DVIRec.RecoveryExprs[0] &&
689281ad6265SDimitry Andric "Expected a SCEVDbgSalvageBuilder for location 0");
689381ad6265SDimitry Andric SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
689481ad6265SDimitry Andric B->appendToVectors(NewExpr, NewLocationOps);
689581ad6265SDimitry Andric }
689681ad6265SDimitry Andric for (const auto &Op : DVIRec.Expr->expr_ops()) {
689781ad6265SDimitry Andric // Most Ops needn't be updated.
689881ad6265SDimitry Andric if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
689981ad6265SDimitry Andric Op.appendToVector(NewExpr);
690081ad6265SDimitry Andric continue;
690181ad6265SDimitry Andric }
690281ad6265SDimitry Andric
690381ad6265SDimitry Andric uint64_t LocationArgIndex = Op.getArg(0);
690481ad6265SDimitry Andric SCEVDbgValueBuilder *DbgBuilder =
690581ad6265SDimitry Andric DVIRec.RecoveryExprs[LocationArgIndex].get();
690681ad6265SDimitry Andric // The location doesn't have s SCEVDbgValueBuilder, so LSR did not
690781ad6265SDimitry Andric // optimise it away. So just translate the argument to the updated
690881ad6265SDimitry Andric // location index.
690981ad6265SDimitry Andric if (!DbgBuilder) {
691081ad6265SDimitry Andric NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
691181ad6265SDimitry Andric assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
691281ad6265SDimitry Andric "Expected a positive index for the location-op position.");
691381ad6265SDimitry Andric NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
691481ad6265SDimitry Andric continue;
691581ad6265SDimitry Andric }
691681ad6265SDimitry Andric // The location has a recovery expression.
691781ad6265SDimitry Andric DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
691881ad6265SDimitry Andric }
691981ad6265SDimitry Andric
692081ad6265SDimitry Andric UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
69217a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef))
69227a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
69237a6dacacSDimitry Andric << *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n");
69247a6dacacSDimitry Andric else
69257a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
69260fca6ea1SDimitry Andric << *cast<DbgVariableRecord *>(DVIRec.DbgRef) << "\n");
692781ad6265SDimitry Andric return true;
692881ad6265SDimitry Andric }
692981ad6265SDimitry Andric
693081ad6265SDimitry Andric /// Obtain an expression for the iteration count, then attempt to salvage the
693181ad6265SDimitry Andric /// dbg.value intrinsics.
DbgRewriteSalvageableDVIs(llvm::Loop * L,ScalarEvolution & SE,llvm::PHINode * LSRInductionVar,SmallVector<std::unique_ptr<DVIRecoveryRec>,2> & DVIToUpdate)69327a6dacacSDimitry Andric static void DbgRewriteSalvageableDVIs(
69337a6dacacSDimitry Andric llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar,
693481ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6935fe6060f1SDimitry Andric if (DVIToUpdate.empty())
6936349cc55cSDimitry Andric return;
6937fe6060f1SDimitry Andric
6938fe6060f1SDimitry Andric const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
6939fe6060f1SDimitry Andric assert(SCEVInductionVar &&
6940fe6060f1SDimitry Andric "Anticipated a SCEV for the post-LSR induction variable");
6941fe6060f1SDimitry Andric
6942fe6060f1SDimitry Andric if (const SCEVAddRecExpr *IVAddRec =
6943fe6060f1SDimitry Andric dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
69446e75b2fbSDimitry Andric if (!IVAddRec->isAffine())
6945349cc55cSDimitry Andric return;
69466e75b2fbSDimitry Andric
694781ad6265SDimitry Andric // Prevent translation using excessive resources.
6948349cc55cSDimitry Andric if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6949349cc55cSDimitry Andric return;
6950349cc55cSDimitry Andric
6951349cc55cSDimitry Andric // The iteration count is required to recover location values.
6952fe6060f1SDimitry Andric SCEVDbgValueBuilder IterCountExpr;
695381ad6265SDimitry Andric IterCountExpr.pushLocation(LSRInductionVar);
6954fe6060f1SDimitry Andric if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6955349cc55cSDimitry Andric return;
6956fe6060f1SDimitry Andric
6957fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6958fe6060f1SDimitry Andric << '\n');
6959fe6060f1SDimitry Andric
6960fe6060f1SDimitry Andric for (auto &DVIRec : DVIToUpdate) {
696181ad6265SDimitry Andric SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
696281ad6265SDimitry Andric IterCountExpr);
6963fe6060f1SDimitry Andric }
6964349cc55cSDimitry Andric }
6965fe6060f1SDimitry Andric }
6966fe6060f1SDimitry Andric
6967fe6060f1SDimitry Andric /// Identify and cache salvageable DVI locations and expressions along with the
6968349cc55cSDimitry Andric /// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6969349cc55cSDimitry Andric /// cacheing and salvaging.
DbgGatherSalvagableDVI(Loop * L,ScalarEvolution & SE,SmallVector<std::unique_ptr<DVIRecoveryRec>,2> & SalvageableDVISCEVs,SmallSet<AssertingVH<DbgValueInst>,2> & DVIHandles)697081ad6265SDimitry Andric static void DbgGatherSalvagableDVI(
697181ad6265SDimitry Andric Loop *L, ScalarEvolution &SE,
697281ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6973fe6060f1SDimitry Andric SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
6974bdd1243dSDimitry Andric for (const auto &B : L->getBlocks()) {
6975e8d8bef9SDimitry Andric for (auto &I : *B) {
69767a6dacacSDimitry Andric auto ProcessDbgValue = [&](auto *DbgVal) -> bool {
697781ad6265SDimitry Andric // Ensure that if any location op is undef that the dbg.vlue is not
697881ad6265SDimitry Andric // cached.
69797a6dacacSDimitry Andric if (DbgVal->isKillLocation())
69807a6dacacSDimitry Andric return false;
6981349cc55cSDimitry Andric
698281ad6265SDimitry Andric // Check that the location op SCEVs are suitable for translation to
698381ad6265SDimitry Andric // DIExpression.
698481ad6265SDimitry Andric const auto &HasTranslatableLocationOps =
69857a6dacacSDimitry Andric [&](const auto *DbgValToTranslate) -> bool {
69867a6dacacSDimitry Andric for (const auto LocOp : DbgValToTranslate->location_ops()) {
698781ad6265SDimitry Andric if (!LocOp)
698881ad6265SDimitry Andric return false;
6989fe6060f1SDimitry Andric
699081ad6265SDimitry Andric if (!SE.isSCEVable(LocOp->getType()))
699181ad6265SDimitry Andric return false;
6992fe6060f1SDimitry Andric
699381ad6265SDimitry Andric const SCEV *S = SE.getSCEV(LocOp);
6994349cc55cSDimitry Andric if (SE.containsUndefs(S))
699581ad6265SDimitry Andric return false;
699681ad6265SDimitry Andric }
699781ad6265SDimitry Andric return true;
699881ad6265SDimitry Andric };
699981ad6265SDimitry Andric
70007a6dacacSDimitry Andric if (!HasTranslatableLocationOps(DbgVal))
70017a6dacacSDimitry Andric return false;
7002349cc55cSDimitry Andric
700381ad6265SDimitry Andric std::unique_ptr<DVIRecoveryRec> NewRec =
70047a6dacacSDimitry Andric std::make_unique<DVIRecoveryRec>(DbgVal);
70057a6dacacSDimitry Andric // Each location Op may need a SCEVDbgValueBuilder in order to recover
70067a6dacacSDimitry Andric // it. Pre-allocating a vector will enable quick lookups of the builder
70077a6dacacSDimitry Andric // later during the salvage.
70087a6dacacSDimitry Andric NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
70097a6dacacSDimitry Andric for (const auto LocOp : DbgVal->location_ops()) {
701081ad6265SDimitry Andric NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
701181ad6265SDimitry Andric NewRec->LocationOps.push_back(LocOp);
70127a6dacacSDimitry Andric NewRec->HadLocationArgList = DbgVal->hasArgList();
701381ad6265SDimitry Andric }
701481ad6265SDimitry Andric SalvageableDVISCEVs.push_back(std::move(NewRec));
70157a6dacacSDimitry Andric return true;
70167a6dacacSDimitry Andric };
70170fca6ea1SDimitry Andric for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
70180fca6ea1SDimitry Andric if (DVR.isDbgValue() || DVR.isDbgAssign())
70190fca6ea1SDimitry Andric ProcessDbgValue(&DVR);
70207a6dacacSDimitry Andric }
70217a6dacacSDimitry Andric auto DVI = dyn_cast<DbgValueInst>(&I);
70227a6dacacSDimitry Andric if (!DVI)
70237a6dacacSDimitry Andric continue;
70247a6dacacSDimitry Andric if (ProcessDbgValue(DVI))
7025fe6060f1SDimitry Andric DVIHandles.insert(DVI);
7026e8d8bef9SDimitry Andric }
7027e8d8bef9SDimitry Andric }
7028e8d8bef9SDimitry Andric }
7029e8d8bef9SDimitry Andric
7030fe6060f1SDimitry Andric /// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
7031fe6060f1SDimitry Andric /// any PHi from the loop header is usable, but may have less chance of
7032fe6060f1SDimitry Andric /// surviving subsequent transforms.
GetInductionVariable(const Loop & L,ScalarEvolution & SE,const LSRInstance & LSR)7033fe6060f1SDimitry Andric static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
7034fe6060f1SDimitry Andric const LSRInstance &LSR) {
7035349cc55cSDimitry Andric
7036349cc55cSDimitry Andric auto IsSuitableIV = [&](PHINode *P) {
7037349cc55cSDimitry Andric if (!SE.isSCEVable(P->getType()))
7038349cc55cSDimitry Andric return false;
7039349cc55cSDimitry Andric if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
7040349cc55cSDimitry Andric return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
7041349cc55cSDimitry Andric return false;
7042349cc55cSDimitry Andric };
7043349cc55cSDimitry Andric
7044349cc55cSDimitry Andric // For now, just pick the first IV that was generated and inserted by
7045349cc55cSDimitry Andric // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
7046349cc55cSDimitry Andric // by subsequent transforms.
7047fe6060f1SDimitry Andric for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
7048fe6060f1SDimitry Andric if (!IV)
7049e8d8bef9SDimitry Andric continue;
7050fe6060f1SDimitry Andric
7051349cc55cSDimitry Andric // There should only be PHI node IVs.
7052349cc55cSDimitry Andric PHINode *P = cast<PHINode>(&*IV);
7053349cc55cSDimitry Andric
7054349cc55cSDimitry Andric if (IsSuitableIV(P))
7055349cc55cSDimitry Andric return P;
7056fe6060f1SDimitry Andric }
7057fe6060f1SDimitry Andric
7058349cc55cSDimitry Andric for (PHINode &P : L.getHeader()->phis()) {
7059349cc55cSDimitry Andric if (IsSuitableIV(&P))
7060349cc55cSDimitry Andric return &P;
7061e8d8bef9SDimitry Andric }
7062fe6060f1SDimitry Andric return nullptr;
7063e8d8bef9SDimitry Andric }
7064e8d8bef9SDimitry Andric
706506c3fb27SDimitry Andric static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
canFoldTermCondOfLoop(Loop * L,ScalarEvolution & SE,DominatorTree & DT,const LoopInfo & LI,const TargetTransformInfo & TTI)7066bdd1243dSDimitry Andric canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
70670fca6ea1SDimitry Andric const LoopInfo &LI, const TargetTransformInfo &TTI) {
7068bdd1243dSDimitry Andric if (!L->isInnermost()) {
7069bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
7070bdd1243dSDimitry Andric return std::nullopt;
7071bdd1243dSDimitry Andric }
7072bdd1243dSDimitry Andric // Only inspect on simple loop structure
7073bdd1243dSDimitry Andric if (!L->isLoopSimplifyForm()) {
7074bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
7075bdd1243dSDimitry Andric return std::nullopt;
7076bdd1243dSDimitry Andric }
7077bdd1243dSDimitry Andric
7078bdd1243dSDimitry Andric if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
7079bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
7080bdd1243dSDimitry Andric return std::nullopt;
7081bdd1243dSDimitry Andric }
7082bdd1243dSDimitry Andric
7083bdd1243dSDimitry Andric BasicBlock *LoopLatch = L->getLoopLatch();
708406c3fb27SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
708506c3fb27SDimitry Andric if (!BI || BI->isUnconditional())
7086bdd1243dSDimitry Andric return std::nullopt;
708706c3fb27SDimitry Andric auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
708806c3fb27SDimitry Andric if (!TermCond) {
708906c3fb27SDimitry Andric LLVM_DEBUG(
709006c3fb27SDimitry Andric dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
7091bdd1243dSDimitry Andric return std::nullopt;
7092bdd1243dSDimitry Andric }
7093bdd1243dSDimitry Andric if (!TermCond->hasOneUse()) {
7094bdd1243dSDimitry Andric LLVM_DEBUG(
7095bdd1243dSDimitry Andric dbgs()
7096bdd1243dSDimitry Andric << "Cannot replace terminating condition with more than one use\n");
7097bdd1243dSDimitry Andric return std::nullopt;
7098bdd1243dSDimitry Andric }
7099bdd1243dSDimitry Andric
710006c3fb27SDimitry Andric BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
710106c3fb27SDimitry Andric Value *RHS = TermCond->getOperand(1);
710206c3fb27SDimitry Andric if (!LHS || !L->isLoopInvariant(RHS))
710306c3fb27SDimitry Andric // We could pattern match the inverse form of the icmp, but that is
710406c3fb27SDimitry Andric // non-canonical, and this pass is running *very* late in the pipeline.
710506c3fb27SDimitry Andric return std::nullopt;
7106bdd1243dSDimitry Andric
710706c3fb27SDimitry Andric // Find the IV used by the current exit condition.
710806c3fb27SDimitry Andric PHINode *ToFold;
710906c3fb27SDimitry Andric Value *ToFoldStart, *ToFoldStep;
711006c3fb27SDimitry Andric if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
711106c3fb27SDimitry Andric return std::nullopt;
7112bdd1243dSDimitry Andric
71130fca6ea1SDimitry Andric // Ensure the simple recurrence is a part of the current loop.
71140fca6ea1SDimitry Andric if (ToFold->getParent() != L->getHeader())
71150fca6ea1SDimitry Andric return std::nullopt;
71160fca6ea1SDimitry Andric
711706c3fb27SDimitry Andric // If that IV isn't dead after we rewrite the exit condition in terms of
711806c3fb27SDimitry Andric // another IV, there's no point in doing the transform.
711906c3fb27SDimitry Andric if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
712006c3fb27SDimitry Andric return std::nullopt;
7121bdd1243dSDimitry Andric
71220fca6ea1SDimitry Andric // Inserting instructions in the preheader has a runtime cost, scale
71230fca6ea1SDimitry Andric // the allowed cost with the loops trip count as best we can.
71240fca6ea1SDimitry Andric const unsigned ExpansionBudget = [&]() {
71250fca6ea1SDimitry Andric unsigned Budget = 2 * SCEVCheapExpansionBudget;
71260fca6ea1SDimitry Andric if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
71270fca6ea1SDimitry Andric return std::min(Budget, SmallTC);
71280fca6ea1SDimitry Andric if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
71290fca6ea1SDimitry Andric return std::min(Budget, *SmallTC);
71300fca6ea1SDimitry Andric // Unknown trip count, assume long running by default.
71310fca6ea1SDimitry Andric return Budget;
71320fca6ea1SDimitry Andric }();
71330fca6ea1SDimitry Andric
7134bdd1243dSDimitry Andric const SCEV *BECount = SE.getBackedgeTakenCount(L);
71350fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout();
7136bdd1243dSDimitry Andric SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
7137bdd1243dSDimitry Andric
7138bdd1243dSDimitry Andric PHINode *ToHelpFold = nullptr;
7139bdd1243dSDimitry Andric const SCEV *TermValueS = nullptr;
714006c3fb27SDimitry Andric bool MustDropPoison = false;
71410fca6ea1SDimitry Andric auto InsertPt = L->getLoopPreheader()->getTerminator();
7142bdd1243dSDimitry Andric for (PHINode &PN : L->getHeader()->phis()) {
714306c3fb27SDimitry Andric if (ToFold == &PN)
714406c3fb27SDimitry Andric continue;
714506c3fb27SDimitry Andric
7146bdd1243dSDimitry Andric if (!SE.isSCEVable(PN.getType())) {
7147bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "IV of phi '" << PN
7148bdd1243dSDimitry Andric << "' is not SCEV-able, not qualified for the "
7149bdd1243dSDimitry Andric "terminating condition folding.\n");
7150bdd1243dSDimitry Andric continue;
7151bdd1243dSDimitry Andric }
715206c3fb27SDimitry Andric const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
7153bdd1243dSDimitry Andric // Only speculate on affine AddRec
7154bdd1243dSDimitry Andric if (!AddRec || !AddRec->isAffine()) {
7155bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
7156bdd1243dSDimitry Andric << "' is not an affine add recursion, not qualified "
7157bdd1243dSDimitry Andric "for the terminating condition folding.\n");
7158bdd1243dSDimitry Andric continue;
7159bdd1243dSDimitry Andric }
7160bdd1243dSDimitry Andric
716106c3fb27SDimitry Andric // Check that we can compute the value of AddRec on the exiting iteration
716206c3fb27SDimitry Andric // without soundness problems. evaluateAtIteration internally needs
716306c3fb27SDimitry Andric // to multiply the stride of the iteration number - which may wrap around.
716406c3fb27SDimitry Andric // The issue here is subtle because computing the result accounting for
716506c3fb27SDimitry Andric // wrap is insufficient. In order to use the result in an exit test, we
716606c3fb27SDimitry Andric // must also know that AddRec doesn't take the same value on any previous
716706c3fb27SDimitry Andric // iteration. The simplest case to consider is a candidate IV which is
716806c3fb27SDimitry Andric // narrower than the trip count (and thus original IV), but this can
716906c3fb27SDimitry Andric // also happen due to non-unit strides on the candidate IVs.
71707a6dacacSDimitry Andric if (!AddRec->hasNoSelfWrap() ||
71717a6dacacSDimitry Andric !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
717206c3fb27SDimitry Andric continue;
717306c3fb27SDimitry Andric
717406c3fb27SDimitry Andric const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
717506c3fb27SDimitry Andric const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
717606c3fb27SDimitry Andric if (!Expander.isSafeToExpand(TermValueSLocal)) {
717706c3fb27SDimitry Andric LLVM_DEBUG(
717806c3fb27SDimitry Andric dbgs() << "Is not safe to expand terminating value for phi node" << PN
717906c3fb27SDimitry Andric << "\n");
718006c3fb27SDimitry Andric continue;
7181bdd1243dSDimitry Andric }
718206c3fb27SDimitry Andric
71830fca6ea1SDimitry Andric if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
71840fca6ea1SDimitry Andric &TTI, InsertPt)) {
71850fca6ea1SDimitry Andric LLVM_DEBUG(
71860fca6ea1SDimitry Andric dbgs() << "Is too expensive to expand terminating value for phi node"
71870fca6ea1SDimitry Andric << PN << "\n");
71880fca6ea1SDimitry Andric continue;
71890fca6ea1SDimitry Andric }
71900fca6ea1SDimitry Andric
719106c3fb27SDimitry Andric // The candidate IV may have been otherwise dead and poison from the
719206c3fb27SDimitry Andric // very first iteration. If we can't disprove that, we can't use the IV.
719306c3fb27SDimitry Andric if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
719406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
719506c3fb27SDimitry Andric << PN << "\n");
719606c3fb27SDimitry Andric continue;
719706c3fb27SDimitry Andric }
719806c3fb27SDimitry Andric
719906c3fb27SDimitry Andric // The candidate IV may become poison on the last iteration. If this
720006c3fb27SDimitry Andric // value is not branched on, this is a well defined program. We're
720106c3fb27SDimitry Andric // about to add a new use to this IV, and we have to ensure we don't
720206c3fb27SDimitry Andric // insert UB which didn't previously exist.
720306c3fb27SDimitry Andric bool MustDropPoisonLocal = false;
720406c3fb27SDimitry Andric Instruction *PostIncV =
720506c3fb27SDimitry Andric cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
720606c3fb27SDimitry Andric if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
720706c3fb27SDimitry Andric &DT)) {
720806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
720906c3fb27SDimitry Andric << PN << "\n");
721006c3fb27SDimitry Andric
721106c3fb27SDimitry Andric // If this is a complex recurrance with multiple instructions computing
721206c3fb27SDimitry Andric // the backedge value, we might need to strip poison flags from all of
721306c3fb27SDimitry Andric // them.
721406c3fb27SDimitry Andric if (PostIncV->getOperand(0) != &PN)
721506c3fb27SDimitry Andric continue;
721606c3fb27SDimitry Andric
721706c3fb27SDimitry Andric // In order to perform the transform, we need to drop the poison generating
721806c3fb27SDimitry Andric // flags on this instruction (if any).
721906c3fb27SDimitry Andric MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
722006c3fb27SDimitry Andric }
722106c3fb27SDimitry Andric
722206c3fb27SDimitry Andric // We pick the last legal alternate IV. We could expore choosing an optimal
722306c3fb27SDimitry Andric // alternate IV if we had a decent heuristic to do so.
722406c3fb27SDimitry Andric ToHelpFold = &PN;
722506c3fb27SDimitry Andric TermValueS = TermValueSLocal;
722606c3fb27SDimitry Andric MustDropPoison = MustDropPoisonLocal;
7227bdd1243dSDimitry Andric }
7228bdd1243dSDimitry Andric
7229bdd1243dSDimitry Andric LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
7230bdd1243dSDimitry Andric << "Cannot find other AddRec IV to help folding\n";);
7231bdd1243dSDimitry Andric
7232bdd1243dSDimitry Andric LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
7233bdd1243dSDimitry Andric << "\nFound loop that can fold terminating condition\n"
7234bdd1243dSDimitry Andric << " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
7235bdd1243dSDimitry Andric << " TermCond: " << *TermCond << "\n"
7236bdd1243dSDimitry Andric << " BrandInst: " << *BI << "\n"
7237bdd1243dSDimitry Andric << " ToFold: " << *ToFold << "\n"
7238bdd1243dSDimitry Andric << " ToHelpFold: " << *ToHelpFold << "\n");
7239bdd1243dSDimitry Andric
7240bdd1243dSDimitry Andric if (!ToFold || !ToHelpFold)
7241bdd1243dSDimitry Andric return std::nullopt;
724206c3fb27SDimitry Andric return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
7243bdd1243dSDimitry Andric }
7244bdd1243dSDimitry Andric
ReduceLoopStrength(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI,AssumptionCache & AC,TargetLibraryInfo & TLI,MemorySSA * MSSA)72450b57cec5SDimitry Andric static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
72460b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI,
72470b57cec5SDimitry Andric const TargetTransformInfo &TTI,
72485ffd83dbSDimitry Andric AssumptionCache &AC, TargetLibraryInfo &TLI,
72495ffd83dbSDimitry Andric MemorySSA *MSSA) {
72500b57cec5SDimitry Andric
7251fe6060f1SDimitry Andric // Debug preservation - before we start removing anything identify which DVI
7252fe6060f1SDimitry Andric // meet the salvageable criteria and store their DIExpression and SCEVs.
725381ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
7254fe6060f1SDimitry Andric SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
725581ad6265SDimitry Andric DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
7256fe6060f1SDimitry Andric
72570b57cec5SDimitry Andric bool Changed = false;
72585ffd83dbSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU;
72595ffd83dbSDimitry Andric if (MSSA)
72605ffd83dbSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
72610b57cec5SDimitry Andric
72620b57cec5SDimitry Andric // Run the main LSR transformation.
7263fe6060f1SDimitry Andric const LSRInstance &Reducer =
7264fe6060f1SDimitry Andric LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
7265fe6060f1SDimitry Andric Changed |= Reducer.getChanged();
7266e8d8bef9SDimitry Andric
72670b57cec5SDimitry Andric // Remove any extra phis created by processing inner loops.
72685ffd83dbSDimitry Andric Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
72690b57cec5SDimitry Andric if (EnablePhiElim && L->isLoopSimplifyForm()) {
72700b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts;
72710fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout();
7272e8d8bef9SDimitry Andric SCEVExpander Rewriter(SE, DL, "lsr", false);
72730b57cec5SDimitry Andric #ifndef NDEBUG
72740b57cec5SDimitry Andric Rewriter.setDebugType(DEBUG_TYPE);
72750b57cec5SDimitry Andric #endif
72760b57cec5SDimitry Andric unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
72770fca6ea1SDimitry Andric Rewriter.clear();
72780b57cec5SDimitry Andric if (numFolded) {
72790b57cec5SDimitry Andric Changed = true;
72805ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
72815ffd83dbSDimitry Andric MSSAU.get());
72825ffd83dbSDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
72830b57cec5SDimitry Andric }
72840b57cec5SDimitry Andric }
728581ad6265SDimitry Andric // LSR may at times remove all uses of an induction variable from a loop.
728681ad6265SDimitry Andric // The only remaining use is the PHI in the exit block.
728781ad6265SDimitry Andric // When this is the case, if the exit value of the IV can be calculated using
728881ad6265SDimitry Andric // SCEV, we can replace the exit block PHI with the final value of the IV and
728981ad6265SDimitry Andric // skip the updates in each loop iteration.
7290753f127fSDimitry Andric if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
729181ad6265SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts;
72920fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout();
7293bdd1243dSDimitry Andric SCEVExpander Rewriter(SE, DL, "lsr", true);
729481ad6265SDimitry Andric int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
7295753f127fSDimitry Andric UnusedIndVarInLoop, DeadInsts);
72960fca6ea1SDimitry Andric Rewriter.clear();
729781ad6265SDimitry Andric if (Rewrites) {
729881ad6265SDimitry Andric Changed = true;
729981ad6265SDimitry Andric RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
730081ad6265SDimitry Andric MSSAU.get());
730181ad6265SDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
730281ad6265SDimitry Andric }
730381ad6265SDimitry Andric }
7304e8d8bef9SDimitry Andric
73055f757f3fSDimitry Andric const bool EnableFormTerm = [&] {
73065f757f3fSDimitry Andric switch (AllowTerminatingConditionFoldingAfterLSR) {
73075f757f3fSDimitry Andric case cl::BOU_TRUE:
73085f757f3fSDimitry Andric return true;
73095f757f3fSDimitry Andric case cl::BOU_FALSE:
73105f757f3fSDimitry Andric return false;
73115f757f3fSDimitry Andric case cl::BOU_UNSET:
73125f757f3fSDimitry Andric return TTI.shouldFoldTerminatingConditionAfterLSR();
73135f757f3fSDimitry Andric }
73145f757f3fSDimitry Andric llvm_unreachable("Unhandled cl::boolOrDefault enum");
73155f757f3fSDimitry Andric }();
73165f757f3fSDimitry Andric
73175f757f3fSDimitry Andric if (EnableFormTerm) {
73180fca6ea1SDimitry Andric if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
731906c3fb27SDimitry Andric auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7320bdd1243dSDimitry Andric
7321bdd1243dSDimitry Andric Changed = true;
7322bdd1243dSDimitry Andric NumTermFold++;
7323bdd1243dSDimitry Andric
7324bdd1243dSDimitry Andric BasicBlock *LoopPreheader = L->getLoopPreheader();
7325bdd1243dSDimitry Andric BasicBlock *LoopLatch = L->getLoopLatch();
7326bdd1243dSDimitry Andric
7327bdd1243dSDimitry Andric (void)ToFold;
7328bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
7329bdd1243dSDimitry Andric << *ToFold << "\n"
7330bdd1243dSDimitry Andric << "New term-cond phi-node:\n"
7331bdd1243dSDimitry Andric << *ToHelpFold << "\n");
7332bdd1243dSDimitry Andric
7333bdd1243dSDimitry Andric Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7334bdd1243dSDimitry Andric (void)StartValue;
7335bdd1243dSDimitry Andric Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7336bdd1243dSDimitry Andric
733706c3fb27SDimitry Andric // See comment in canFoldTermCondOfLoop on why this is sufficient.
733806c3fb27SDimitry Andric if (MustDrop)
733906c3fb27SDimitry Andric cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
734006c3fb27SDimitry Andric
7341bdd1243dSDimitry Andric // SCEVExpander for both use in preheader and latch
73420fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout();
7343bdd1243dSDimitry Andric SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
7344bdd1243dSDimitry Andric
7345bdd1243dSDimitry Andric assert(Expander.isSafeToExpand(TermValueS) &&
7346bdd1243dSDimitry Andric "Terminating value was checked safe in canFoldTerminatingCondition");
7347bdd1243dSDimitry Andric
73487a6dacacSDimitry Andric // Create new terminating value at loop preheader
7349bdd1243dSDimitry Andric Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
7350bdd1243dSDimitry Andric LoopPreheader->getTerminator());
7351bdd1243dSDimitry Andric
7352bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
7353bdd1243dSDimitry Andric << *StartValue << "\n"
7354bdd1243dSDimitry Andric << "Terminating value of new term-cond phi-node:\n"
7355bdd1243dSDimitry Andric << *TermValue << "\n");
7356bdd1243dSDimitry Andric
7357bdd1243dSDimitry Andric // Create new terminating condition at loop latch
7358bdd1243dSDimitry Andric BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
7359bdd1243dSDimitry Andric ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
7360bdd1243dSDimitry Andric IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
736106c3fb27SDimitry Andric Value *NewTermCond =
736206c3fb27SDimitry Andric LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
7363bdd1243dSDimitry Andric "lsr_fold_term_cond.replaced_term_cond");
736406c3fb27SDimitry Andric // Swap successors to exit loop body if IV equals to new TermValue
736506c3fb27SDimitry Andric if (BI->getSuccessor(0) == L->getHeader())
736606c3fb27SDimitry Andric BI->swapSuccessors();
7367bdd1243dSDimitry Andric
7368bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Old term-cond:\n"
7369bdd1243dSDimitry Andric << *OldTermCond << "\n"
7370cb14a3feSDimitry Andric << "New term-cond:\n" << *NewTermCond << "\n");
7371bdd1243dSDimitry Andric
7372bdd1243dSDimitry Andric BI->setCondition(NewTermCond);
7373bdd1243dSDimitry Andric
73740fca6ea1SDimitry Andric Expander.clear();
7375bdd1243dSDimitry Andric OldTermCond->eraseFromParent();
7376bdd1243dSDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
7377bdd1243dSDimitry Andric }
7378bdd1243dSDimitry Andric }
7379bdd1243dSDimitry Andric
738081ad6265SDimitry Andric if (SalvageableDVIRecords.empty())
7381fe6060f1SDimitry Andric return Changed;
7382e8d8bef9SDimitry Andric
7383fe6060f1SDimitry Andric // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7384fe6060f1SDimitry Andric // expressions composed using the derived iteration count.
7385fe6060f1SDimitry Andric // TODO: Allow for multiple IV references for nested AddRecSCEVs
7386bdd1243dSDimitry Andric for (const auto &L : LI) {
7387fe6060f1SDimitry Andric if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
738881ad6265SDimitry Andric DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
7389fe6060f1SDimitry Andric else {
7390fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7391fe6060f1SDimitry Andric "could not be identified.\n");
7392fe6060f1SDimitry Andric }
7393fe6060f1SDimitry Andric }
7394fe6060f1SDimitry Andric
739581ad6265SDimitry Andric for (auto &Rec : SalvageableDVIRecords)
739681ad6265SDimitry Andric Rec->clear();
739781ad6265SDimitry Andric SalvageableDVIRecords.clear();
7398fe6060f1SDimitry Andric DVIHandles.clear();
73990b57cec5SDimitry Andric return Changed;
74000b57cec5SDimitry Andric }
74010b57cec5SDimitry Andric
runOnLoop(Loop * L,LPPassManager &)74020b57cec5SDimitry Andric bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
74030b57cec5SDimitry Andric if (skipLoop(L))
74040b57cec5SDimitry Andric return false;
74050b57cec5SDimitry Andric
74060b57cec5SDimitry Andric auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
74070b57cec5SDimitry Andric auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
74080b57cec5SDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
74090b57cec5SDimitry Andric auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
74100b57cec5SDimitry Andric const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
74110b57cec5SDimitry Andric *L->getHeader()->getParent());
74120b57cec5SDimitry Andric auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
74130b57cec5SDimitry Andric *L->getHeader()->getParent());
74145ffd83dbSDimitry Andric auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
74158bcb0991SDimitry Andric *L->getHeader()->getParent());
74165ffd83dbSDimitry Andric auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
74175ffd83dbSDimitry Andric MemorySSA *MSSA = nullptr;
74185ffd83dbSDimitry Andric if (MSSAAnalysis)
74195ffd83dbSDimitry Andric MSSA = &MSSAAnalysis->getMSSA();
74205ffd83dbSDimitry Andric return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
74210b57cec5SDimitry Andric }
74220b57cec5SDimitry Andric
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)74230b57cec5SDimitry Andric PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
74240b57cec5SDimitry Andric LoopStandardAnalysisResults &AR,
74250b57cec5SDimitry Andric LPMUpdater &) {
74260b57cec5SDimitry Andric if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
74275ffd83dbSDimitry Andric AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))
74280b57cec5SDimitry Andric return PreservedAnalyses::all();
74290b57cec5SDimitry Andric
74305ffd83dbSDimitry Andric auto PA = getLoopPassPreservedAnalyses();
74315ffd83dbSDimitry Andric if (AR.MSSA)
74325ffd83dbSDimitry Andric PA.preserve<MemorySSAAnalysis>();
74335ffd83dbSDimitry Andric return PA;
74340b57cec5SDimitry Andric }
74350b57cec5SDimitry Andric
74360b57cec5SDimitry Andric char LoopStrengthReduce::ID = 0;
74370b57cec5SDimitry Andric
74380b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
74390b57cec5SDimitry Andric "Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)74400b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
74410b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
74420b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
74430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
74440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
74450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
74460b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
74470b57cec5SDimitry Andric "Loop Strength Reduction", false, false)
74480b57cec5SDimitry Andric
74490b57cec5SDimitry Andric Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
7450