10b57cec5SDimitry Andric //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Loop unrolling may create many similar GEPs for array accesses.
100b57cec5SDimitry Andric // e.g., a 2-level loop
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // float a[32][32]; // global variable
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // for (int i = 0; i < 2; ++i) {
150b57cec5SDimitry Andric // for (int j = 0; j < 2; ++j) {
160b57cec5SDimitry Andric // ...
170b57cec5SDimitry Andric // ... = a[x + i][y + j];
180b57cec5SDimitry Andric // ...
190b57cec5SDimitry Andric // }
200b57cec5SDimitry Andric // }
210b57cec5SDimitry Andric //
220b57cec5SDimitry Andric // will probably be unrolled to:
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // gep %a, 0, %x, %y; load
250b57cec5SDimitry Andric // gep %a, 0, %x, %y + 1; load
260b57cec5SDimitry Andric // gep %a, 0, %x + 1, %y; load
270b57cec5SDimitry Andric // gep %a, 0, %x + 1, %y + 1; load
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric // LLVM's GVN does not use partial redundancy elimination yet, and is thus
300b57cec5SDimitry Andric // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
310b57cec5SDimitry Andric // significant slowdown in targets with limited addressing modes. For instance,
320b57cec5SDimitry Andric // because the PTX target does not support the reg+reg addressing mode, the
330b57cec5SDimitry Andric // NVPTX backend emits PTX code that literally computes the pointer address of
340b57cec5SDimitry Andric // each GEP, wasting tons of registers. It emits the following PTX for the
350b57cec5SDimitry Andric // first load and similar PTX for other loads.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // mov.u32 %r1, %x;
380b57cec5SDimitry Andric // mov.u32 %r2, %y;
390b57cec5SDimitry Andric // mul.wide.u32 %rl2, %r1, 128;
400b57cec5SDimitry Andric // mov.u64 %rl3, a;
410b57cec5SDimitry Andric // add.s64 %rl4, %rl3, %rl2;
420b57cec5SDimitry Andric // mul.wide.u32 %rl5, %r2, 4;
430b57cec5SDimitry Andric // add.s64 %rl6, %rl4, %rl5;
440b57cec5SDimitry Andric // ld.global.f32 %f1, [%rl6];
450b57cec5SDimitry Andric //
460b57cec5SDimitry Andric // To reduce the register pressure, the optimization implemented in this file
470b57cec5SDimitry Andric // merges the common part of a group of GEPs, so we can compute each pointer
480b57cec5SDimitry Andric // address by adding a simple offset to the common part, saving many registers.
490b57cec5SDimitry Andric //
500b57cec5SDimitry Andric // It works by splitting each GEP into a variadic base and a constant offset.
510b57cec5SDimitry Andric // The variadic base can be computed once and reused by multiple GEPs, and the
520b57cec5SDimitry Andric // constant offsets can be nicely folded into the reg+immediate addressing mode
530b57cec5SDimitry Andric // (supported by most targets) without using any extra register.
540b57cec5SDimitry Andric //
550b57cec5SDimitry Andric // For instance, we transform the four GEPs and four loads in the above example
560b57cec5SDimitry Andric // into:
570b57cec5SDimitry Andric //
580b57cec5SDimitry Andric // base = gep a, 0, x, y
590b57cec5SDimitry Andric // load base
60*0fca6ea1SDimitry Andric // load base + 1 * sizeof(float)
610b57cec5SDimitry Andric // load base + 32 * sizeof(float)
620b57cec5SDimitry Andric // load base + 33 * sizeof(float)
630b57cec5SDimitry Andric //
640b57cec5SDimitry Andric // Given the transformed IR, a backend that supports the reg+immediate
650b57cec5SDimitry Andric // addressing mode can easily fold the pointer arithmetics into the loads. For
660b57cec5SDimitry Andric // example, the NVPTX backend can easily fold the pointer arithmetics into the
670b57cec5SDimitry Andric // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
680b57cec5SDimitry Andric //
690b57cec5SDimitry Andric // mov.u32 %r1, %tid.x;
700b57cec5SDimitry Andric // mov.u32 %r2, %tid.y;
710b57cec5SDimitry Andric // mul.wide.u32 %rl2, %r1, 128;
720b57cec5SDimitry Andric // mov.u64 %rl3, a;
730b57cec5SDimitry Andric // add.s64 %rl4, %rl3, %rl2;
740b57cec5SDimitry Andric // mul.wide.u32 %rl5, %r2, 4;
750b57cec5SDimitry Andric // add.s64 %rl6, %rl4, %rl5;
760b57cec5SDimitry Andric // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
770b57cec5SDimitry Andric // ld.global.f32 %f2, [%rl6+4]; // much better
780b57cec5SDimitry Andric // ld.global.f32 %f3, [%rl6+128]; // much better
790b57cec5SDimitry Andric // ld.global.f32 %f4, [%rl6+132]; // much better
800b57cec5SDimitry Andric //
810b57cec5SDimitry Andric // Another improvement enabled by the LowerGEP flag is to lower a GEP with
820b57cec5SDimitry Andric // multiple indices to either multiple GEPs with a single index or arithmetic
830b57cec5SDimitry Andric // operations (depending on whether the target uses alias analysis in codegen).
840b57cec5SDimitry Andric // Such transformation can have following benefits:
850b57cec5SDimitry Andric // (1) It can always extract constants in the indices of structure type.
860b57cec5SDimitry Andric // (2) After such Lowering, there are more optimization opportunities such as
870b57cec5SDimitry Andric // CSE, LICM and CGP.
880b57cec5SDimitry Andric //
890b57cec5SDimitry Andric // E.g. The following GEPs have multiple indices:
900b57cec5SDimitry Andric // BB1:
910b57cec5SDimitry Andric // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
920b57cec5SDimitry Andric // load %p
930b57cec5SDimitry Andric // ...
940b57cec5SDimitry Andric // BB2:
950b57cec5SDimitry Andric // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
960b57cec5SDimitry Andric // load %p2
970b57cec5SDimitry Andric // ...
980b57cec5SDimitry Andric //
990b57cec5SDimitry Andric // We can not do CSE to the common part related to index "i64 %i". Lowering
1000b57cec5SDimitry Andric // GEPs can achieve such goals.
1010b57cec5SDimitry Andric // If the target does not use alias analysis in codegen, this pass will
1020b57cec5SDimitry Andric // lower a GEP with multiple indices into arithmetic operations:
1030b57cec5SDimitry Andric // BB1:
1040b57cec5SDimitry Andric // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
1050b57cec5SDimitry Andric // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
1060b57cec5SDimitry Andric // %3 = add i64 %1, %2 ; CSE opportunity
1070b57cec5SDimitry Andric // %4 = mul i64 %j1, length_of_struct
1080b57cec5SDimitry Andric // %5 = add i64 %3, %4
1090b57cec5SDimitry Andric // %6 = add i64 %3, struct_field_3 ; Constant offset
1100b57cec5SDimitry Andric // %p = inttoptr i64 %6 to i32*
1110b57cec5SDimitry Andric // load %p
1120b57cec5SDimitry Andric // ...
1130b57cec5SDimitry Andric // BB2:
1140b57cec5SDimitry Andric // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
1150b57cec5SDimitry Andric // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
1160b57cec5SDimitry Andric // %9 = add i64 %7, %8 ; CSE opportunity
1170b57cec5SDimitry Andric // %10 = mul i64 %j2, length_of_struct
1180b57cec5SDimitry Andric // %11 = add i64 %9, %10
1190b57cec5SDimitry Andric // %12 = add i64 %11, struct_field_2 ; Constant offset
1200b57cec5SDimitry Andric // %p = inttoptr i64 %12 to i32*
1210b57cec5SDimitry Andric // load %p2
1220b57cec5SDimitry Andric // ...
1230b57cec5SDimitry Andric //
1240b57cec5SDimitry Andric // If the target uses alias analysis in codegen, this pass will lower a GEP
1250b57cec5SDimitry Andric // with multiple indices into multiple GEPs with a single index:
1260b57cec5SDimitry Andric // BB1:
1270b57cec5SDimitry Andric // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
1280b57cec5SDimitry Andric // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
1290b57cec5SDimitry Andric // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
1300b57cec5SDimitry Andric // %4 = mul i64 %j1, length_of_struct
1310b57cec5SDimitry Andric // %5 = getelementptr i8* %3, i64 %4
1320b57cec5SDimitry Andric // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
1330b57cec5SDimitry Andric // %p = bitcast i8* %6 to i32*
1340b57cec5SDimitry Andric // load %p
1350b57cec5SDimitry Andric // ...
1360b57cec5SDimitry Andric // BB2:
1370b57cec5SDimitry Andric // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
1380b57cec5SDimitry Andric // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
1390b57cec5SDimitry Andric // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
1400b57cec5SDimitry Andric // %10 = mul i64 %j2, length_of_struct
1410b57cec5SDimitry Andric // %11 = getelementptr i8* %9, i64 %10
1420b57cec5SDimitry Andric // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
1430b57cec5SDimitry Andric // %p2 = bitcast i8* %12 to i32*
1440b57cec5SDimitry Andric // load %p2
1450b57cec5SDimitry Andric // ...
1460b57cec5SDimitry Andric //
1470b57cec5SDimitry Andric // Lowering GEPs can also benefit other passes such as LICM and CGP.
1480b57cec5SDimitry Andric // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
1490b57cec5SDimitry Andric // indices if one of the index is variant. If we lower such GEP into invariant
1500b57cec5SDimitry Andric // parts and variant parts, LICM can hoist/sink those invariant parts.
1510b57cec5SDimitry Andric // CGP (CodeGen Prepare) tries to sink address calculations that match the
1520b57cec5SDimitry Andric // target's addressing modes. A GEP with multiple indices may not match and will
1530b57cec5SDimitry Andric // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
1540b57cec5SDimitry Andric // them. So we end up with a better addressing mode.
1550b57cec5SDimitry Andric //
1560b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1570b57cec5SDimitry Andric
158e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
1590b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
1600b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
1610b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
1620b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
1630b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
1640b57cec5SDimitry Andric #include "llvm/Analysis/MemoryBuiltins.h"
1650b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
1660b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
1670b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
1680b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
1690b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
1700b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
1710b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
1720b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
1730b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
1740b57cec5SDimitry Andric #include "llvm/IR/Function.h"
1750b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h"
1760b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
177*0fca6ea1SDimitry Andric #include "llvm/IR/InstrTypes.h"
1780b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
1790b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
1800b57cec5SDimitry Andric #include "llvm/IR/Module.h"
181e8d8bef9SDimitry Andric #include "llvm/IR/PassManager.h"
1820b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
1830b57cec5SDimitry Andric #include "llvm/IR/Type.h"
1840b57cec5SDimitry Andric #include "llvm/IR/User.h"
1850b57cec5SDimitry Andric #include "llvm/IR/Value.h"
186480093f4SDimitry Andric #include "llvm/InitializePasses.h"
1870b57cec5SDimitry Andric #include "llvm/Pass.h"
1880b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
1890b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
1900b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
1910b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
1920b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
193480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
1940b57cec5SDimitry Andric #include <cassert>
1950b57cec5SDimitry Andric #include <cstdint>
1960b57cec5SDimitry Andric #include <string>
1970b57cec5SDimitry Andric
1980b57cec5SDimitry Andric using namespace llvm;
1990b57cec5SDimitry Andric using namespace llvm::PatternMatch;
2000b57cec5SDimitry Andric
2010b57cec5SDimitry Andric static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
2020b57cec5SDimitry Andric "disable-separate-const-offset-from-gep", cl::init(false),
2030b57cec5SDimitry Andric cl::desc("Do not separate the constant offset from a GEP instruction"),
2040b57cec5SDimitry Andric cl::Hidden);
2050b57cec5SDimitry Andric
2060b57cec5SDimitry Andric // Setting this flag may emit false positives when the input module already
2070b57cec5SDimitry Andric // contains dead instructions. Therefore, we set it only in unit tests that are
2080b57cec5SDimitry Andric // free of dead code.
2090b57cec5SDimitry Andric static cl::opt<bool>
2100b57cec5SDimitry Andric VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
2110b57cec5SDimitry Andric cl::desc("Verify this pass produces no dead code"),
2120b57cec5SDimitry Andric cl::Hidden);
2130b57cec5SDimitry Andric
2140b57cec5SDimitry Andric namespace {
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric /// A helper class for separating a constant offset from a GEP index.
2170b57cec5SDimitry Andric ///
2180b57cec5SDimitry Andric /// In real programs, a GEP index may be more complicated than a simple addition
2190b57cec5SDimitry Andric /// of something and a constant integer which can be trivially splitted. For
2200b57cec5SDimitry Andric /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
2210b57cec5SDimitry Andric /// constant offset, so that we can separate the index to (a << 3) + b and 5.
2220b57cec5SDimitry Andric ///
2230b57cec5SDimitry Andric /// Therefore, this class looks into the expression that computes a given GEP
2240b57cec5SDimitry Andric /// index, and tries to find a constant integer that can be hoisted to the
2250b57cec5SDimitry Andric /// outermost level of the expression as an addition. Not every constant in an
2260b57cec5SDimitry Andric /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
2270b57cec5SDimitry Andric /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
2280b57cec5SDimitry Andric /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
2290b57cec5SDimitry Andric class ConstantOffsetExtractor {
2300b57cec5SDimitry Andric public:
2310b57cec5SDimitry Andric /// Extracts a constant offset from the given GEP index. It returns the
2320b57cec5SDimitry Andric /// new index representing the remainder (equal to the original index minus
2330b57cec5SDimitry Andric /// the constant offset), or nullptr if we cannot extract a constant offset.
2340b57cec5SDimitry Andric /// \p Idx The given GEP index
2350b57cec5SDimitry Andric /// \p GEP The given GEP
2360b57cec5SDimitry Andric /// \p UserChainTail Outputs the tail of UserChain so that we can
2370b57cec5SDimitry Andric /// garbage-collect unused instructions in UserChain.
2380b57cec5SDimitry Andric static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239*0fca6ea1SDimitry Andric User *&UserChainTail);
2400b57cec5SDimitry Andric
2410b57cec5SDimitry Andric /// Looks for a constant offset from the given GEP index without extracting
2420b57cec5SDimitry Andric /// it. It returns the numeric value of the extracted constant offset (0 if
2430b57cec5SDimitry Andric /// failed). The meaning of the arguments are the same as Extract.
244*0fca6ea1SDimitry Andric static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric private:
ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)247*0fca6ea1SDimitry Andric ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
248*0fca6ea1SDimitry Andric : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {}
2490b57cec5SDimitry Andric
2500b57cec5SDimitry Andric /// Searches the expression that computes V for a non-zero constant C s.t.
2510b57cec5SDimitry Andric /// V can be reassociated into the form V' + C. If the searching is
2520b57cec5SDimitry Andric /// successful, returns C and update UserChain as a def-use chain from C to V;
2530b57cec5SDimitry Andric /// otherwise, UserChain is empty.
2540b57cec5SDimitry Andric ///
2550b57cec5SDimitry Andric /// \p V The given expression
2560b57cec5SDimitry Andric /// \p SignExtended Whether V will be sign-extended in the computation of the
2570b57cec5SDimitry Andric /// GEP index
2580b57cec5SDimitry Andric /// \p ZeroExtended Whether V will be zero-extended in the computation of the
2590b57cec5SDimitry Andric /// GEP index
2600b57cec5SDimitry Andric /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
2610b57cec5SDimitry Andric /// an index of an inbounds GEP is guaranteed to be
2620b57cec5SDimitry Andric /// non-negative. Levaraging this, we can better split
2630b57cec5SDimitry Andric /// inbounds GEPs.
2640b57cec5SDimitry Andric APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
2650b57cec5SDimitry Andric
2660b57cec5SDimitry Andric /// A helper function to look into both operands of a binary operator.
2670b57cec5SDimitry Andric APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
2680b57cec5SDimitry Andric bool ZeroExtended);
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric /// After finding the constant offset C from the GEP index I, we build a new
2710b57cec5SDimitry Andric /// index I' s.t. I' + C = I. This function builds and returns the new
2720b57cec5SDimitry Andric /// index I' according to UserChain produced by function "find".
2730b57cec5SDimitry Andric ///
2740b57cec5SDimitry Andric /// The building conceptually takes two steps:
2750b57cec5SDimitry Andric /// 1) iteratively distribute s/zext towards the leaves of the expression tree
2760b57cec5SDimitry Andric /// that computes I
2770b57cec5SDimitry Andric /// 2) reassociate the expression tree to the form I' + C.
2780b57cec5SDimitry Andric ///
2790b57cec5SDimitry Andric /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
2800b57cec5SDimitry Andric /// sext to a, b and 5 so that we have
2810b57cec5SDimitry Andric /// sext(a) + (sext(b) + 5).
2820b57cec5SDimitry Andric /// Then, we reassociate it to
2830b57cec5SDimitry Andric /// (sext(a) + sext(b)) + 5.
2840b57cec5SDimitry Andric /// Given this form, we know I' is sext(a) + sext(b).
2850b57cec5SDimitry Andric Value *rebuildWithoutConstOffset();
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric /// After the first step of rebuilding the GEP index without the constant
2880b57cec5SDimitry Andric /// offset, distribute s/zext to the operands of all operators in UserChain.
2890b57cec5SDimitry Andric /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
2900b57cec5SDimitry Andric /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
2910b57cec5SDimitry Andric ///
2920b57cec5SDimitry Andric /// The function also updates UserChain to point to new subexpressions after
2930b57cec5SDimitry Andric /// distributing s/zext. e.g., the old UserChain of the above example is
2940b57cec5SDimitry Andric /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
2950b57cec5SDimitry Andric /// and the new UserChain is
2960b57cec5SDimitry Andric /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
2970b57cec5SDimitry Andric /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
2980b57cec5SDimitry Andric ///
2990b57cec5SDimitry Andric /// \p ChainIndex The index to UserChain. ChainIndex is initially
3000b57cec5SDimitry Andric /// UserChain.size() - 1, and is decremented during
3010b57cec5SDimitry Andric /// the recursion.
3020b57cec5SDimitry Andric Value *distributeExtsAndCloneChain(unsigned ChainIndex);
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andric /// Reassociates the GEP index to the form I' + C and returns I'.
3050b57cec5SDimitry Andric Value *removeConstOffset(unsigned ChainIndex);
3060b57cec5SDimitry Andric
3070b57cec5SDimitry Andric /// A helper function to apply ExtInsts, a list of s/zext, to value V.
3080b57cec5SDimitry Andric /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
3090b57cec5SDimitry Andric /// returns "sext i32 (zext i16 V to i32) to i64".
3100b57cec5SDimitry Andric Value *applyExts(Value *V);
3110b57cec5SDimitry Andric
3120b57cec5SDimitry Andric /// A helper function that returns whether we can trace into the operands
3130b57cec5SDimitry Andric /// of binary operator BO for a constant offset.
3140b57cec5SDimitry Andric ///
3150b57cec5SDimitry Andric /// \p SignExtended Whether BO is surrounded by sext
3160b57cec5SDimitry Andric /// \p ZeroExtended Whether BO is surrounded by zext
3170b57cec5SDimitry Andric /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
3180b57cec5SDimitry Andric /// array index.
3190b57cec5SDimitry Andric bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
3200b57cec5SDimitry Andric bool NonNegative);
3210b57cec5SDimitry Andric
3220b57cec5SDimitry Andric /// The path from the constant offset to the old GEP index. e.g., if the GEP
3230b57cec5SDimitry Andric /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
3240b57cec5SDimitry Andric /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
3250b57cec5SDimitry Andric /// UserChain[2] will be the entire expression "a * b + (c + 5)".
3260b57cec5SDimitry Andric ///
3270b57cec5SDimitry Andric /// This path helps to rebuild the new GEP index.
3280b57cec5SDimitry Andric SmallVector<User *, 8> UserChain;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric /// A data structure used in rebuildWithoutConstOffset. Contains all
3310b57cec5SDimitry Andric /// sext/zext instructions along UserChain.
3320b57cec5SDimitry Andric SmallVector<CastInst *, 16> ExtInsts;
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andric /// Insertion position of cloned instructions.
335*0fca6ea1SDimitry Andric BasicBlock::iterator IP;
3360b57cec5SDimitry Andric
3370b57cec5SDimitry Andric const DataLayout &DL;
3380b57cec5SDimitry Andric };
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric /// A pass that tries to split every GEP in the function into a variadic
3410b57cec5SDimitry Andric /// base and a constant offset. It is a FunctionPass because searching for the
3420b57cec5SDimitry Andric /// constant offset may inspect other basic blocks.
343e8d8bef9SDimitry Andric class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
3440b57cec5SDimitry Andric public:
3450b57cec5SDimitry Andric static char ID;
3460b57cec5SDimitry Andric
SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP=false)347e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
3480b57cec5SDimitry Andric : FunctionPass(ID), LowerGEP(LowerGEP) {
349e8d8bef9SDimitry Andric initializeSeparateConstOffsetFromGEPLegacyPassPass(
350e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry());
3510b57cec5SDimitry Andric }
3520b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const3530b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
3540b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
3550b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
3560b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
3570b57cec5SDimitry Andric AU.setPreservesCFG();
3580b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>();
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric
3610b57cec5SDimitry Andric bool runOnFunction(Function &F) override;
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric private:
364e8d8bef9SDimitry Andric bool LowerGEP;
365e8d8bef9SDimitry Andric };
366e8d8bef9SDimitry Andric
367e8d8bef9SDimitry Andric /// A pass that tries to split every GEP in the function into a variadic
368e8d8bef9SDimitry Andric /// base and a constant offset. It is a FunctionPass because searching for the
369e8d8bef9SDimitry Andric /// constant offset may inspect other basic blocks.
370e8d8bef9SDimitry Andric class SeparateConstOffsetFromGEP {
371e8d8bef9SDimitry Andric public:
SeparateConstOffsetFromGEP(DominatorTree * DT,LoopInfo * LI,TargetLibraryInfo * TLI,function_ref<TargetTransformInfo & (Function &)> GetTTI,bool LowerGEP)372e8d8bef9SDimitry Andric SeparateConstOffsetFromGEP(
37306c3fb27SDimitry Andric DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI,
374e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
37506c3fb27SDimitry Andric : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
376e8d8bef9SDimitry Andric
377e8d8bef9SDimitry Andric bool run(Function &F);
378e8d8bef9SDimitry Andric
379e8d8bef9SDimitry Andric private:
38006c3fb27SDimitry Andric /// Track the operands of an add or sub.
38106c3fb27SDimitry Andric using ExprKey = std::pair<Value *, Value *>;
38206c3fb27SDimitry Andric
38306c3fb27SDimitry Andric /// Create a pair for use as a map key for a commutable operation.
createNormalizedCommutablePair(Value * A,Value * B)38406c3fb27SDimitry Andric static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
38506c3fb27SDimitry Andric if (A < B)
38606c3fb27SDimitry Andric return {A, B};
38706c3fb27SDimitry Andric return {B, A};
38806c3fb27SDimitry Andric }
38906c3fb27SDimitry Andric
3900b57cec5SDimitry Andric /// Tries to split the given GEP into a variadic base and a constant offset,
3910b57cec5SDimitry Andric /// and returns true if the splitting succeeds.
3920b57cec5SDimitry Andric bool splitGEP(GetElementPtrInst *GEP);
3930b57cec5SDimitry Andric
394*0fca6ea1SDimitry Andric /// Tries to reorder the given GEP with the GEP that produces the base if
395*0fca6ea1SDimitry Andric /// doing so results in producing a constant offset as the outermost
396*0fca6ea1SDimitry Andric /// index.
397*0fca6ea1SDimitry Andric bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
398*0fca6ea1SDimitry Andric
3990b57cec5SDimitry Andric /// Lower a GEP with multiple indices into multiple GEPs with a single index.
4000b57cec5SDimitry Andric /// Function splitGEP already split the original GEP into a variadic part and
4010b57cec5SDimitry Andric /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
4020b57cec5SDimitry Andric /// variadic part into a set of GEPs with a single index and applies
4030b57cec5SDimitry Andric /// AccumulativeByteOffset to it.
4040b57cec5SDimitry Andric /// \p Variadic The variadic part of the original GEP.
4050b57cec5SDimitry Andric /// \p AccumulativeByteOffset The constant offset.
4060b57cec5SDimitry Andric void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
4070b57cec5SDimitry Andric int64_t AccumulativeByteOffset);
4080b57cec5SDimitry Andric
4090b57cec5SDimitry Andric /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
4100b57cec5SDimitry Andric /// Function splitGEP already split the original GEP into a variadic part and
4110b57cec5SDimitry Andric /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
4120b57cec5SDimitry Andric /// variadic part into a set of arithmetic operations and applies
4130b57cec5SDimitry Andric /// AccumulativeByteOffset to it.
4140b57cec5SDimitry Andric /// \p Variadic The variadic part of the original GEP.
4150b57cec5SDimitry Andric /// \p AccumulativeByteOffset The constant offset.
4160b57cec5SDimitry Andric void lowerToArithmetics(GetElementPtrInst *Variadic,
4170b57cec5SDimitry Andric int64_t AccumulativeByteOffset);
4180b57cec5SDimitry Andric
4190b57cec5SDimitry Andric /// Finds the constant offset within each index and accumulates them. If
4200b57cec5SDimitry Andric /// LowerGEP is true, it finds in indices of both sequential and structure
4210b57cec5SDimitry Andric /// types, otherwise it only finds in sequential indices. The output
4220b57cec5SDimitry Andric /// NeedsExtraction indicates whether we successfully find a non-zero constant
4230b57cec5SDimitry Andric /// offset.
4240b57cec5SDimitry Andric int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
4250b57cec5SDimitry Andric
4260b57cec5SDimitry Andric /// Canonicalize array indices to pointer-size integers. This helps to
4270b57cec5SDimitry Andric /// simplify the logic of splitting a GEP. For example, if a + b is a
4280b57cec5SDimitry Andric /// pointer-size integer, we have
4290b57cec5SDimitry Andric /// gep base, a + b = gep (gep base, a), b
4300b57cec5SDimitry Andric /// However, this equality may not hold if the size of a + b is smaller than
4310b57cec5SDimitry Andric /// the pointer size, because LLVM conceptually sign-extends GEP indices to
4320b57cec5SDimitry Andric /// pointer size before computing the address
4330b57cec5SDimitry Andric /// (http://llvm.org/docs/LangRef.html#id181).
4340b57cec5SDimitry Andric ///
4350b57cec5SDimitry Andric /// This canonicalization is very likely already done in clang and
4360b57cec5SDimitry Andric /// instcombine. Therefore, the program will probably remain the same.
4370b57cec5SDimitry Andric ///
4380b57cec5SDimitry Andric /// Returns true if the module changes.
4390b57cec5SDimitry Andric ///
4400b57cec5SDimitry Andric /// Verified in @i32_add in split-gep.ll
44106c3fb27SDimitry Andric bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
4440b57cec5SDimitry Andric /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
4450b57cec5SDimitry Andric /// the constant offset. After extraction, it becomes desirable to reunion the
4460b57cec5SDimitry Andric /// distributed sexts. For example,
4470b57cec5SDimitry Andric ///
4480b57cec5SDimitry Andric /// &a[sext(i +nsw (j +nsw 5)]
4490b57cec5SDimitry Andric /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
4500b57cec5SDimitry Andric /// => constant extraction &a[sext(i) + sext(j)] + 5
4510b57cec5SDimitry Andric /// => reunion &a[sext(i +nsw j)] + 5
4520b57cec5SDimitry Andric bool reuniteExts(Function &F);
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric /// A helper that reunites sexts in an instruction.
4550b57cec5SDimitry Andric bool reuniteExts(Instruction *I);
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
4585ffd83dbSDimitry Andric Instruction *findClosestMatchingDominator(
45906c3fb27SDimitry Andric ExprKey Key, Instruction *Dominatee,
46006c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
4615ffd83dbSDimitry Andric
4620b57cec5SDimitry Andric /// Verify F is free of dead code.
4630b57cec5SDimitry Andric void verifyNoDeadCode(Function &F);
4640b57cec5SDimitry Andric
4650b57cec5SDimitry Andric bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
4660b57cec5SDimitry Andric
4670b57cec5SDimitry Andric // Swap the index operand of two GEP.
4680b57cec5SDimitry Andric void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
4690b57cec5SDimitry Andric
4700b57cec5SDimitry Andric // Check if it is safe to swap operand of two GEP.
4710b57cec5SDimitry Andric bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
4720b57cec5SDimitry Andric Loop *CurLoop);
4730b57cec5SDimitry Andric
4740b57cec5SDimitry Andric const DataLayout *DL = nullptr;
4750b57cec5SDimitry Andric DominatorTree *DT = nullptr;
4760b57cec5SDimitry Andric LoopInfo *LI;
4770b57cec5SDimitry Andric TargetLibraryInfo *TLI;
478e8d8bef9SDimitry Andric // Retrieved lazily since not always used.
479e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI;
4800b57cec5SDimitry Andric
4810b57cec5SDimitry Andric /// Whether to lower a GEP with multiple indices into arithmetic operations or
4820b57cec5SDimitry Andric /// multiple GEPs with a single index.
4830b57cec5SDimitry Andric bool LowerGEP;
4840b57cec5SDimitry Andric
48506c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;
48606c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;
4870b57cec5SDimitry Andric };
4880b57cec5SDimitry Andric
4890b57cec5SDimitry Andric } // end anonymous namespace
4900b57cec5SDimitry Andric
491e8d8bef9SDimitry Andric char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
4920b57cec5SDimitry Andric
4930b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(
494e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
4950b57cec5SDimitry Andric "Split GEPs to a variadic base and a constant offset for better CSE", false,
4960b57cec5SDimitry Andric false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)4970b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
4980b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
4990b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
5000b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
5010b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
5020b57cec5SDimitry Andric INITIALIZE_PASS_END(
503e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
5040b57cec5SDimitry Andric "Split GEPs to a variadic base and a constant offset for better CSE", false,
5050b57cec5SDimitry Andric false)
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) {
508e8d8bef9SDimitry Andric return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
5090b57cec5SDimitry Andric }
5100b57cec5SDimitry Andric
CanTraceInto(bool SignExtended,bool ZeroExtended,BinaryOperator * BO,bool NonNegative)5110b57cec5SDimitry Andric bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
5120b57cec5SDimitry Andric bool ZeroExtended,
5130b57cec5SDimitry Andric BinaryOperator *BO,
5140b57cec5SDimitry Andric bool NonNegative) {
5150b57cec5SDimitry Andric // We only consider ADD, SUB and OR, because a non-zero constant found in
5160b57cec5SDimitry Andric // expressions composed of these operations can be easily hoisted as a
5170b57cec5SDimitry Andric // constant offset by reassociation.
5180b57cec5SDimitry Andric if (BO->getOpcode() != Instruction::Add &&
5190b57cec5SDimitry Andric BO->getOpcode() != Instruction::Sub &&
5200b57cec5SDimitry Andric BO->getOpcode() != Instruction::Or) {
5210b57cec5SDimitry Andric return false;
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric
5240b57cec5SDimitry Andric Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
525*0fca6ea1SDimitry Andric // Do not trace into "or" unless it is equivalent to "add".
526*0fca6ea1SDimitry Andric // This is the case if the or's disjoint flag is set.
5270b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Or &&
528*0fca6ea1SDimitry Andric !cast<PossiblyDisjointInst>(BO)->isDisjoint())
5290b57cec5SDimitry Andric return false;
5300b57cec5SDimitry Andric
53106c3fb27SDimitry Andric // FIXME: We don't currently support constants from the RHS of subs,
53206c3fb27SDimitry Andric // when we are zero-extended, because we need a way to zero-extended
53306c3fb27SDimitry Andric // them before they are negated.
53406c3fb27SDimitry Andric if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
53506c3fb27SDimitry Andric return false;
53606c3fb27SDimitry Andric
5370b57cec5SDimitry Andric // In addition, tracing into BO requires that its surrounding s/zext (if
5380b57cec5SDimitry Andric // any) is distributable to both operands.
5390b57cec5SDimitry Andric //
5400b57cec5SDimitry Andric // Suppose BO = A op B.
5410b57cec5SDimitry Andric // SignExtended | ZeroExtended | Distributable?
5420b57cec5SDimitry Andric // --------------+--------------+----------------------------------
5430b57cec5SDimitry Andric // 0 | 0 | true because no s/zext exists
5440b57cec5SDimitry Andric // 0 | 1 | zext(BO) == zext(A) op zext(B)
5450b57cec5SDimitry Andric // 1 | 0 | sext(BO) == sext(A) op sext(B)
5460b57cec5SDimitry Andric // 1 | 1 | zext(sext(BO)) ==
5470b57cec5SDimitry Andric // | | zext(sext(A)) op zext(sext(B))
5480b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
5490b57cec5SDimitry Andric // If a + b >= 0 and (a >= 0 or b >= 0), then
5500b57cec5SDimitry Andric // sext(a + b) = sext(a) + sext(b)
5510b57cec5SDimitry Andric // even if the addition is not marked nsw.
5520b57cec5SDimitry Andric //
5535ffd83dbSDimitry Andric // Leveraging this invariant, we can trace into an sext'ed inbound GEP
5540b57cec5SDimitry Andric // index if the constant offset is non-negative.
5550b57cec5SDimitry Andric //
5560b57cec5SDimitry Andric // Verified in @sext_add in split-gep.ll.
5570b57cec5SDimitry Andric if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
5580b57cec5SDimitry Andric if (!ConstLHS->isNegative())
5590b57cec5SDimitry Andric return true;
5600b57cec5SDimitry Andric }
5610b57cec5SDimitry Andric if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
5620b57cec5SDimitry Andric if (!ConstRHS->isNegative())
5630b57cec5SDimitry Andric return true;
5640b57cec5SDimitry Andric }
5650b57cec5SDimitry Andric }
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
5680b57cec5SDimitry Andric // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
5690b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Add ||
5700b57cec5SDimitry Andric BO->getOpcode() == Instruction::Sub) {
5710b57cec5SDimitry Andric if (SignExtended && !BO->hasNoSignedWrap())
5720b57cec5SDimitry Andric return false;
5730b57cec5SDimitry Andric if (ZeroExtended && !BO->hasNoUnsignedWrap())
5740b57cec5SDimitry Andric return false;
5750b57cec5SDimitry Andric }
5760b57cec5SDimitry Andric
5770b57cec5SDimitry Andric return true;
5780b57cec5SDimitry Andric }
5790b57cec5SDimitry Andric
findInEitherOperand(BinaryOperator * BO,bool SignExtended,bool ZeroExtended)5800b57cec5SDimitry Andric APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
5810b57cec5SDimitry Andric bool SignExtended,
5820b57cec5SDimitry Andric bool ZeroExtended) {
5835ffd83dbSDimitry Andric // Save off the current height of the chain, in case we need to restore it.
5845ffd83dbSDimitry Andric size_t ChainLength = UserChain.size();
5855ffd83dbSDimitry Andric
5860b57cec5SDimitry Andric // BO being non-negative does not shed light on whether its operands are
5870b57cec5SDimitry Andric // non-negative. Clear the NonNegative flag here.
5880b57cec5SDimitry Andric APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
5890b57cec5SDimitry Andric /* NonNegative */ false);
5900b57cec5SDimitry Andric // If we found a constant offset in the left operand, stop and return that.
5910b57cec5SDimitry Andric // This shortcut might cause us to miss opportunities of combining the
5920b57cec5SDimitry Andric // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
5930b57cec5SDimitry Andric // However, such cases are probably already handled by -instcombine,
5940b57cec5SDimitry Andric // given this pass runs after the standard optimizations.
5950b57cec5SDimitry Andric if (ConstantOffset != 0) return ConstantOffset;
5965ffd83dbSDimitry Andric
5975ffd83dbSDimitry Andric // Reset the chain back to where it was when we started exploring this node,
5985ffd83dbSDimitry Andric // since visiting the LHS didn't pan out.
5995ffd83dbSDimitry Andric UserChain.resize(ChainLength);
6005ffd83dbSDimitry Andric
6010b57cec5SDimitry Andric ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
6020b57cec5SDimitry Andric /* NonNegative */ false);
6030b57cec5SDimitry Andric // If U is a sub operator, negate the constant offset found in the right
6040b57cec5SDimitry Andric // operand.
6050b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Sub)
6060b57cec5SDimitry Andric ConstantOffset = -ConstantOffset;
6075ffd83dbSDimitry Andric
6085ffd83dbSDimitry Andric // If RHS wasn't a suitable candidate either, reset the chain again.
6095ffd83dbSDimitry Andric if (ConstantOffset == 0)
6105ffd83dbSDimitry Andric UserChain.resize(ChainLength);
6115ffd83dbSDimitry Andric
6120b57cec5SDimitry Andric return ConstantOffset;
6130b57cec5SDimitry Andric }
6140b57cec5SDimitry Andric
find(Value * V,bool SignExtended,bool ZeroExtended,bool NonNegative)6150b57cec5SDimitry Andric APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
6160b57cec5SDimitry Andric bool ZeroExtended, bool NonNegative) {
6170b57cec5SDimitry Andric // TODO(jingyue): We could trace into integer/pointer casts, such as
6180b57cec5SDimitry Andric // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
6190b57cec5SDimitry Andric // integers because it gives good enough results for our benchmarks.
6200b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
6210b57cec5SDimitry Andric
6220b57cec5SDimitry Andric // We cannot do much with Values that are not a User, such as an Argument.
6230b57cec5SDimitry Andric User *U = dyn_cast<User>(V);
6240b57cec5SDimitry Andric if (U == nullptr) return APInt(BitWidth, 0);
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andric APInt ConstantOffset(BitWidth, 0);
6270b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
6280b57cec5SDimitry Andric // Hooray, we found it!
6290b57cec5SDimitry Andric ConstantOffset = CI->getValue();
6300b57cec5SDimitry Andric } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
6310b57cec5SDimitry Andric // Trace into subexpressions for more hoisting opportunities.
6320b57cec5SDimitry Andric if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
6330b57cec5SDimitry Andric ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
6340b57cec5SDimitry Andric } else if (isa<TruncInst>(V)) {
6350b57cec5SDimitry Andric ConstantOffset =
6360b57cec5SDimitry Andric find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
6370b57cec5SDimitry Andric .trunc(BitWidth);
6380b57cec5SDimitry Andric } else if (isa<SExtInst>(V)) {
6390b57cec5SDimitry Andric ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
6400b57cec5SDimitry Andric ZeroExtended, NonNegative).sext(BitWidth);
6410b57cec5SDimitry Andric } else if (isa<ZExtInst>(V)) {
6420b57cec5SDimitry Andric // As an optimization, we can clear the SignExtended flag because
6430b57cec5SDimitry Andric // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
6440b57cec5SDimitry Andric //
6450b57cec5SDimitry Andric // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
6460b57cec5SDimitry Andric ConstantOffset =
6470b57cec5SDimitry Andric find(U->getOperand(0), /* SignExtended */ false,
6480b57cec5SDimitry Andric /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
6490b57cec5SDimitry Andric }
6500b57cec5SDimitry Andric
6510b57cec5SDimitry Andric // If we found a non-zero constant offset, add it to the path for
6520b57cec5SDimitry Andric // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
6530b57cec5SDimitry Andric // help this optimization.
6540b57cec5SDimitry Andric if (ConstantOffset != 0)
6550b57cec5SDimitry Andric UserChain.push_back(U);
6560b57cec5SDimitry Andric return ConstantOffset;
6570b57cec5SDimitry Andric }
6580b57cec5SDimitry Andric
applyExts(Value * V)6590b57cec5SDimitry Andric Value *ConstantOffsetExtractor::applyExts(Value *V) {
6600b57cec5SDimitry Andric Value *Current = V;
6610b57cec5SDimitry Andric // ExtInsts is built in the use-def order. Therefore, we apply them to V
6620b57cec5SDimitry Andric // in the reversed order.
6630eae32dcSDimitry Andric for (CastInst *I : llvm::reverse(ExtInsts)) {
6640b57cec5SDimitry Andric if (Constant *C = dyn_cast<Constant>(Current)) {
6655f757f3fSDimitry Andric // Try to constant fold the cast.
6665f757f3fSDimitry Andric Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);
6675f757f3fSDimitry Andric if (Current)
6685f757f3fSDimitry Andric continue;
6695f757f3fSDimitry Andric }
6705f757f3fSDimitry Andric
6710eae32dcSDimitry Andric Instruction *Ext = I->clone();
6720b57cec5SDimitry Andric Ext->setOperand(0, Current);
673*0fca6ea1SDimitry Andric Ext->insertBefore(*IP->getParent(), IP);
6740b57cec5SDimitry Andric Current = Ext;
6750b57cec5SDimitry Andric }
6760b57cec5SDimitry Andric return Current;
6770b57cec5SDimitry Andric }
6780b57cec5SDimitry Andric
rebuildWithoutConstOffset()6790b57cec5SDimitry Andric Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
6800b57cec5SDimitry Andric distributeExtsAndCloneChain(UserChain.size() - 1);
6810b57cec5SDimitry Andric // Remove all nullptrs (used to be s/zext) from UserChain.
6820b57cec5SDimitry Andric unsigned NewSize = 0;
6830b57cec5SDimitry Andric for (User *I : UserChain) {
6840b57cec5SDimitry Andric if (I != nullptr) {
6850b57cec5SDimitry Andric UserChain[NewSize] = I;
6860b57cec5SDimitry Andric NewSize++;
6870b57cec5SDimitry Andric }
6880b57cec5SDimitry Andric }
6890b57cec5SDimitry Andric UserChain.resize(NewSize);
6900b57cec5SDimitry Andric return removeConstOffset(UserChain.size() - 1);
6910b57cec5SDimitry Andric }
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andric Value *
distributeExtsAndCloneChain(unsigned ChainIndex)6940b57cec5SDimitry Andric ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
6950b57cec5SDimitry Andric User *U = UserChain[ChainIndex];
6960b57cec5SDimitry Andric if (ChainIndex == 0) {
6970b57cec5SDimitry Andric assert(isa<ConstantInt>(U));
6980b57cec5SDimitry Andric // If U is a ConstantInt, applyExts will return a ConstantInt as well.
6990b57cec5SDimitry Andric return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
7000b57cec5SDimitry Andric }
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric if (CastInst *Cast = dyn_cast<CastInst>(U)) {
7030b57cec5SDimitry Andric assert(
7040b57cec5SDimitry Andric (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
7050b57cec5SDimitry Andric "Only following instructions can be traced: sext, zext & trunc");
7060b57cec5SDimitry Andric ExtInsts.push_back(Cast);
7070b57cec5SDimitry Andric UserChain[ChainIndex] = nullptr;
7080b57cec5SDimitry Andric return distributeExtsAndCloneChain(ChainIndex - 1);
7090b57cec5SDimitry Andric }
7100b57cec5SDimitry Andric
7110b57cec5SDimitry Andric // Function find only trace into BinaryOperator and CastInst.
7120b57cec5SDimitry Andric BinaryOperator *BO = cast<BinaryOperator>(U);
7130b57cec5SDimitry Andric // OpNo = which operand of BO is UserChain[ChainIndex - 1]
7140b57cec5SDimitry Andric unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
7150b57cec5SDimitry Andric Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
7160b57cec5SDimitry Andric Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
7170b57cec5SDimitry Andric
7180b57cec5SDimitry Andric BinaryOperator *NewBO = nullptr;
7190b57cec5SDimitry Andric if (OpNo == 0) {
7200b57cec5SDimitry Andric NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
7210b57cec5SDimitry Andric BO->getName(), IP);
7220b57cec5SDimitry Andric } else {
7230b57cec5SDimitry Andric NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
7240b57cec5SDimitry Andric BO->getName(), IP);
7250b57cec5SDimitry Andric }
7260b57cec5SDimitry Andric return UserChain[ChainIndex] = NewBO;
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric
removeConstOffset(unsigned ChainIndex)7290b57cec5SDimitry Andric Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
7300b57cec5SDimitry Andric if (ChainIndex == 0) {
7310b57cec5SDimitry Andric assert(isa<ConstantInt>(UserChain[ChainIndex]));
7320b57cec5SDimitry Andric return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
7330b57cec5SDimitry Andric }
7340b57cec5SDimitry Andric
7350b57cec5SDimitry Andric BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
7365ffd83dbSDimitry Andric assert((BO->use_empty() || BO->hasOneUse()) &&
7370b57cec5SDimitry Andric "distributeExtsAndCloneChain clones each BinaryOperator in "
7380b57cec5SDimitry Andric "UserChain, so no one should be used more than "
7390b57cec5SDimitry Andric "once");
7400b57cec5SDimitry Andric
7410b57cec5SDimitry Andric unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
7420b57cec5SDimitry Andric assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
7430b57cec5SDimitry Andric Value *NextInChain = removeConstOffset(ChainIndex - 1);
7440b57cec5SDimitry Andric Value *TheOther = BO->getOperand(1 - OpNo);
7450b57cec5SDimitry Andric
7460b57cec5SDimitry Andric // If NextInChain is 0 and not the LHS of a sub, we can simplify the
7470b57cec5SDimitry Andric // sub-expression to be just TheOther.
7480b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
7490b57cec5SDimitry Andric if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
7500b57cec5SDimitry Andric return TheOther;
7510b57cec5SDimitry Andric }
7520b57cec5SDimitry Andric
7530b57cec5SDimitry Andric BinaryOperator::BinaryOps NewOp = BO->getOpcode();
7540b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Or) {
7550b57cec5SDimitry Andric // Rebuild "or" as "add", because "or" may be invalid for the new
7560b57cec5SDimitry Andric // expression.
7570b57cec5SDimitry Andric //
7580b57cec5SDimitry Andric // For instance, given
7590b57cec5SDimitry Andric // a | (b + 5) where a and b + 5 have no common bits,
7600b57cec5SDimitry Andric // we can extract 5 as the constant offset.
7610b57cec5SDimitry Andric //
7620b57cec5SDimitry Andric // However, reusing the "or" in the new index would give us
7630b57cec5SDimitry Andric // (a | b) + 5
7640b57cec5SDimitry Andric // which does not equal a | (b + 5).
7650b57cec5SDimitry Andric //
7660b57cec5SDimitry Andric // Replacing the "or" with "add" is fine, because
7670b57cec5SDimitry Andric // a | (b + 5) = a + (b + 5) = (a + b) + 5
7680b57cec5SDimitry Andric NewOp = Instruction::Add;
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric
7710b57cec5SDimitry Andric BinaryOperator *NewBO;
7720b57cec5SDimitry Andric if (OpNo == 0) {
7730b57cec5SDimitry Andric NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
7740b57cec5SDimitry Andric } else {
7750b57cec5SDimitry Andric NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric NewBO->takeName(BO);
7780b57cec5SDimitry Andric return NewBO;
7790b57cec5SDimitry Andric }
7800b57cec5SDimitry Andric
Extract(Value * Idx,GetElementPtrInst * GEP,User * & UserChainTail)7810b57cec5SDimitry Andric Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
782*0fca6ea1SDimitry Andric User *&UserChainTail) {
783*0fca6ea1SDimitry Andric ConstantOffsetExtractor Extractor(GEP->getIterator());
7840b57cec5SDimitry Andric // Find a non-zero constant offset first.
7850b57cec5SDimitry Andric APInt ConstantOffset =
7860b57cec5SDimitry Andric Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
7870b57cec5SDimitry Andric GEP->isInBounds());
7880b57cec5SDimitry Andric if (ConstantOffset == 0) {
7890b57cec5SDimitry Andric UserChainTail = nullptr;
7900b57cec5SDimitry Andric return nullptr;
7910b57cec5SDimitry Andric }
7920b57cec5SDimitry Andric // Separates the constant offset from the GEP index.
7930b57cec5SDimitry Andric Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
7940b57cec5SDimitry Andric UserChainTail = Extractor.UserChain.back();
7950b57cec5SDimitry Andric return IdxWithoutConstOffset;
7960b57cec5SDimitry Andric }
7970b57cec5SDimitry Andric
Find(Value * Idx,GetElementPtrInst * GEP)798*0fca6ea1SDimitry Andric int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
7990b57cec5SDimitry Andric // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
800*0fca6ea1SDimitry Andric return ConstantOffsetExtractor(GEP->getIterator())
8010b57cec5SDimitry Andric .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
8020b57cec5SDimitry Andric GEP->isInBounds())
8030b57cec5SDimitry Andric .getSExtValue();
8040b57cec5SDimitry Andric }
8050b57cec5SDimitry Andric
canonicalizeArrayIndicesToIndexSize(GetElementPtrInst * GEP)80606c3fb27SDimitry Andric bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
8070b57cec5SDimitry Andric GetElementPtrInst *GEP) {
8080b57cec5SDimitry Andric bool Changed = false;
80906c3fb27SDimitry Andric Type *PtrIdxTy = DL->getIndexType(GEP->getType());
8100b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP);
8110b57cec5SDimitry Andric for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
8120b57cec5SDimitry Andric I != E; ++I, ++GTI) {
8130b57cec5SDimitry Andric // Skip struct member indices which must be i32.
8140b57cec5SDimitry Andric if (GTI.isSequential()) {
81506c3fb27SDimitry Andric if ((*I)->getType() != PtrIdxTy) {
816*0fca6ea1SDimitry Andric *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",
817*0fca6ea1SDimitry Andric GEP->getIterator());
8180b57cec5SDimitry Andric Changed = true;
8190b57cec5SDimitry Andric }
8200b57cec5SDimitry Andric }
8210b57cec5SDimitry Andric }
8220b57cec5SDimitry Andric return Changed;
8230b57cec5SDimitry Andric }
8240b57cec5SDimitry Andric
8250b57cec5SDimitry Andric int64_t
accumulateByteOffset(GetElementPtrInst * GEP,bool & NeedsExtraction)8260b57cec5SDimitry Andric SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
8270b57cec5SDimitry Andric bool &NeedsExtraction) {
8280b57cec5SDimitry Andric NeedsExtraction = false;
8290b57cec5SDimitry Andric int64_t AccumulativeByteOffset = 0;
8300b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP);
8310b57cec5SDimitry Andric for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
8320b57cec5SDimitry Andric if (GTI.isSequential()) {
833bdd1243dSDimitry Andric // Constant offsets of scalable types are not really constant.
8345f757f3fSDimitry Andric if (GTI.getIndexedType()->isScalableTy())
835bdd1243dSDimitry Andric continue;
836bdd1243dSDimitry Andric
8370b57cec5SDimitry Andric // Tries to extract a constant offset from this GEP index.
8380b57cec5SDimitry Andric int64_t ConstantOffset =
839*0fca6ea1SDimitry Andric ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
8400b57cec5SDimitry Andric if (ConstantOffset != 0) {
8410b57cec5SDimitry Andric NeedsExtraction = true;
8420b57cec5SDimitry Andric // A GEP may have multiple indices. We accumulate the extracted
8430b57cec5SDimitry Andric // constant offset to a byte offset, and later offset the remainder of
8440b57cec5SDimitry Andric // the original GEP with this byte offset.
8450b57cec5SDimitry Andric AccumulativeByteOffset +=
8461db9f3b2SDimitry Andric ConstantOffset * GTI.getSequentialElementStride(*DL);
8470b57cec5SDimitry Andric }
8480b57cec5SDimitry Andric } else if (LowerGEP) {
8490b57cec5SDimitry Andric StructType *StTy = GTI.getStructType();
8500b57cec5SDimitry Andric uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
8510b57cec5SDimitry Andric // Skip field 0 as the offset is always 0.
8520b57cec5SDimitry Andric if (Field != 0) {
8530b57cec5SDimitry Andric NeedsExtraction = true;
8540b57cec5SDimitry Andric AccumulativeByteOffset +=
8550b57cec5SDimitry Andric DL->getStructLayout(StTy)->getElementOffset(Field);
8560b57cec5SDimitry Andric }
8570b57cec5SDimitry Andric }
8580b57cec5SDimitry Andric }
8590b57cec5SDimitry Andric return AccumulativeByteOffset;
8600b57cec5SDimitry Andric }
8610b57cec5SDimitry Andric
lowerToSingleIndexGEPs(GetElementPtrInst * Variadic,int64_t AccumulativeByteOffset)8620b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
8630b57cec5SDimitry Andric GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
8640b57cec5SDimitry Andric IRBuilder<> Builder(Variadic);
86506c3fb27SDimitry Andric Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
8660b57cec5SDimitry Andric
8670b57cec5SDimitry Andric Value *ResultPtr = Variadic->getOperand(0);
8680b57cec5SDimitry Andric Loop *L = LI->getLoopFor(Variadic->getParent());
8690b57cec5SDimitry Andric // Check if the base is not loop invariant or used more than once.
8700b57cec5SDimitry Andric bool isSwapCandidate =
8710b57cec5SDimitry Andric L && L->isLoopInvariant(ResultPtr) &&
8720b57cec5SDimitry Andric !hasMoreThanOneUseInLoop(ResultPtr, L);
8730b57cec5SDimitry Andric Value *FirstResult = nullptr;
8740b57cec5SDimitry Andric
8750b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*Variadic);
8760b57cec5SDimitry Andric // Create an ugly GEP for each sequential index. We don't create GEPs for
8770b57cec5SDimitry Andric // structure indices, as they are accumulated in the constant offset index.
8780b57cec5SDimitry Andric for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
8790b57cec5SDimitry Andric if (GTI.isSequential()) {
8800b57cec5SDimitry Andric Value *Idx = Variadic->getOperand(I);
8810b57cec5SDimitry Andric // Skip zero indices.
8820b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
8830b57cec5SDimitry Andric if (CI->isZero())
8840b57cec5SDimitry Andric continue;
8850b57cec5SDimitry Andric
88606c3fb27SDimitry Andric APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
8871db9f3b2SDimitry Andric GTI.getSequentialElementStride(*DL));
8880b57cec5SDimitry Andric // Scale the index by element size.
8890b57cec5SDimitry Andric if (ElementSize != 1) {
8900b57cec5SDimitry Andric if (ElementSize.isPowerOf2()) {
8910b57cec5SDimitry Andric Idx = Builder.CreateShl(
89206c3fb27SDimitry Andric Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
8930b57cec5SDimitry Andric } else {
89406c3fb27SDimitry Andric Idx =
89506c3fb27SDimitry Andric Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric }
8980b57cec5SDimitry Andric // Create an ugly GEP with a single index for each index.
8997a6dacacSDimitry Andric ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");
9000b57cec5SDimitry Andric if (FirstResult == nullptr)
9010b57cec5SDimitry Andric FirstResult = ResultPtr;
9020b57cec5SDimitry Andric }
9030b57cec5SDimitry Andric }
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andric // Create a GEP with the constant offset index.
9060b57cec5SDimitry Andric if (AccumulativeByteOffset != 0) {
90706c3fb27SDimitry Andric Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
9087a6dacacSDimitry Andric ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");
9090b57cec5SDimitry Andric } else
9100b57cec5SDimitry Andric isSwapCandidate = false;
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andric // If we created a GEP with constant index, and the base is loop invariant,
9130b57cec5SDimitry Andric // then we swap the first one with it, so LICM can move constant GEP out
9140b57cec5SDimitry Andric // later.
915e8d8bef9SDimitry Andric auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
916e8d8bef9SDimitry Andric auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
9170b57cec5SDimitry Andric if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
9180b57cec5SDimitry Andric swapGEPOperand(FirstGEP, SecondGEP);
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andric Variadic->replaceAllUsesWith(ResultPtr);
9210b57cec5SDimitry Andric Variadic->eraseFromParent();
9220b57cec5SDimitry Andric }
9230b57cec5SDimitry Andric
9240b57cec5SDimitry Andric void
lowerToArithmetics(GetElementPtrInst * Variadic,int64_t AccumulativeByteOffset)9250b57cec5SDimitry Andric SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
9260b57cec5SDimitry Andric int64_t AccumulativeByteOffset) {
9270b57cec5SDimitry Andric IRBuilder<> Builder(Variadic);
9280b57cec5SDimitry Andric Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
92906c3fb27SDimitry Andric assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
93006c3fb27SDimitry Andric "Pointer type must match index type for arithmetic-based lowering of "
93106c3fb27SDimitry Andric "split GEPs");
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
9340b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*Variadic);
9350b57cec5SDimitry Andric // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
9360b57cec5SDimitry Andric // don't create arithmetics for structure indices, as they are accumulated
9370b57cec5SDimitry Andric // in the constant offset index.
9380b57cec5SDimitry Andric for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
9390b57cec5SDimitry Andric if (GTI.isSequential()) {
9400b57cec5SDimitry Andric Value *Idx = Variadic->getOperand(I);
9410b57cec5SDimitry Andric // Skip zero indices.
9420b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
9430b57cec5SDimitry Andric if (CI->isZero())
9440b57cec5SDimitry Andric continue;
9450b57cec5SDimitry Andric
9460b57cec5SDimitry Andric APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
9471db9f3b2SDimitry Andric GTI.getSequentialElementStride(*DL));
9480b57cec5SDimitry Andric // Scale the index by element size.
9490b57cec5SDimitry Andric if (ElementSize != 1) {
9500b57cec5SDimitry Andric if (ElementSize.isPowerOf2()) {
9510b57cec5SDimitry Andric Idx = Builder.CreateShl(
9520b57cec5SDimitry Andric Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
9530b57cec5SDimitry Andric } else {
9540b57cec5SDimitry Andric Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
9550b57cec5SDimitry Andric }
9560b57cec5SDimitry Andric }
9570b57cec5SDimitry Andric // Create an ADD for each index.
9580b57cec5SDimitry Andric ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
9590b57cec5SDimitry Andric }
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric
9620b57cec5SDimitry Andric // Create an ADD for the constant offset index.
9630b57cec5SDimitry Andric if (AccumulativeByteOffset != 0) {
9640b57cec5SDimitry Andric ResultPtr = Builder.CreateAdd(
9650b57cec5SDimitry Andric ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
9660b57cec5SDimitry Andric }
9670b57cec5SDimitry Andric
9680b57cec5SDimitry Andric ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
9690b57cec5SDimitry Andric Variadic->replaceAllUsesWith(ResultPtr);
9700b57cec5SDimitry Andric Variadic->eraseFromParent();
9710b57cec5SDimitry Andric }
9720b57cec5SDimitry Andric
reorderGEP(GetElementPtrInst * GEP,TargetTransformInfo & TTI)973*0fca6ea1SDimitry Andric bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
974*0fca6ea1SDimitry Andric TargetTransformInfo &TTI) {
975*0fca6ea1SDimitry Andric auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
976*0fca6ea1SDimitry Andric if (!PtrGEP)
977*0fca6ea1SDimitry Andric return false;
978*0fca6ea1SDimitry Andric
979*0fca6ea1SDimitry Andric bool NestedNeedsExtraction;
980*0fca6ea1SDimitry Andric int64_t NestedByteOffset =
981*0fca6ea1SDimitry Andric accumulateByteOffset(PtrGEP, NestedNeedsExtraction);
982*0fca6ea1SDimitry Andric if (!NestedNeedsExtraction)
983*0fca6ea1SDimitry Andric return false;
984*0fca6ea1SDimitry Andric
985*0fca6ea1SDimitry Andric unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
986*0fca6ea1SDimitry Andric if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
987*0fca6ea1SDimitry Andric /*BaseGV=*/nullptr, NestedByteOffset,
988*0fca6ea1SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
989*0fca6ea1SDimitry Andric return false;
990*0fca6ea1SDimitry Andric
991*0fca6ea1SDimitry Andric bool GEPInBounds = GEP->isInBounds();
992*0fca6ea1SDimitry Andric bool PtrGEPInBounds = PtrGEP->isInBounds();
993*0fca6ea1SDimitry Andric bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
994*0fca6ea1SDimitry Andric if (IsChainInBounds) {
995*0fca6ea1SDimitry Andric auto IsKnownNonNegative = [this](Value *V) {
996*0fca6ea1SDimitry Andric return isKnownNonNegative(V, *DL);
997*0fca6ea1SDimitry Andric };
998*0fca6ea1SDimitry Andric IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative);
999*0fca6ea1SDimitry Andric if (IsChainInBounds)
1000*0fca6ea1SDimitry Andric IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative);
1001*0fca6ea1SDimitry Andric }
1002*0fca6ea1SDimitry Andric
1003*0fca6ea1SDimitry Andric IRBuilder<> Builder(GEP);
1004*0fca6ea1SDimitry Andric // For trivial GEP chains, we can swap the indices.
1005*0fca6ea1SDimitry Andric Value *NewSrc = Builder.CreateGEP(
1006*0fca6ea1SDimitry Andric GEP->getSourceElementType(), PtrGEP->getPointerOperand(),
1007*0fca6ea1SDimitry Andric SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);
1008*0fca6ea1SDimitry Andric Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,
1009*0fca6ea1SDimitry Andric SmallVector<Value *, 4>(PtrGEP->indices()),
1010*0fca6ea1SDimitry Andric "", IsChainInBounds);
1011*0fca6ea1SDimitry Andric GEP->replaceAllUsesWith(NewGEP);
1012*0fca6ea1SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(GEP);
1013*0fca6ea1SDimitry Andric return true;
1014*0fca6ea1SDimitry Andric }
1015*0fca6ea1SDimitry Andric
splitGEP(GetElementPtrInst * GEP)10160b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10170b57cec5SDimitry Andric // Skip vector GEPs.
10180b57cec5SDimitry Andric if (GEP->getType()->isVectorTy())
10190b57cec5SDimitry Andric return false;
10200b57cec5SDimitry Andric
10210b57cec5SDimitry Andric // The backend can already nicely handle the case where all indices are
10220b57cec5SDimitry Andric // constant.
10230b57cec5SDimitry Andric if (GEP->hasAllConstantIndices())
10240b57cec5SDimitry Andric return false;
10250b57cec5SDimitry Andric
102606c3fb27SDimitry Andric bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
10270b57cec5SDimitry Andric
10280b57cec5SDimitry Andric bool NeedsExtraction;
10290b57cec5SDimitry Andric int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
10300b57cec5SDimitry Andric
1031e8d8bef9SDimitry Andric TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
10320b57cec5SDimitry Andric
1033*0fca6ea1SDimitry Andric if (!NeedsExtraction) {
1034*0fca6ea1SDimitry Andric Changed |= reorderGEP(GEP, TTI);
1035*0fca6ea1SDimitry Andric return Changed;
1036*0fca6ea1SDimitry Andric }
1037*0fca6ea1SDimitry Andric
10380b57cec5SDimitry Andric // If LowerGEP is disabled, before really splitting the GEP, check whether the
10390b57cec5SDimitry Andric // backend supports the addressing mode we are about to produce. If no, this
10400b57cec5SDimitry Andric // splitting probably won't be beneficial.
10410b57cec5SDimitry Andric // If LowerGEP is enabled, even the extracted constant offset can not match
10420b57cec5SDimitry Andric // the addressing mode, we can still do optimizations to other lowered parts
10430b57cec5SDimitry Andric // of variable indices. Therefore, we don't check for addressing modes in that
10440b57cec5SDimitry Andric // case.
10450b57cec5SDimitry Andric if (!LowerGEP) {
10460b57cec5SDimitry Andric unsigned AddrSpace = GEP->getPointerAddressSpace();
10470b57cec5SDimitry Andric if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
10480b57cec5SDimitry Andric /*BaseGV=*/nullptr, AccumulativeByteOffset,
10490b57cec5SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0,
10500b57cec5SDimitry Andric AddrSpace)) {
10510b57cec5SDimitry Andric return Changed;
10520b57cec5SDimitry Andric }
10530b57cec5SDimitry Andric }
10540b57cec5SDimitry Andric
10550b57cec5SDimitry Andric // Remove the constant offset in each sequential index. The resultant GEP
10560b57cec5SDimitry Andric // computes the variadic base.
10570b57cec5SDimitry Andric // Notice that we don't remove struct field indices here. If LowerGEP is
10580b57cec5SDimitry Andric // disabled, a structure index is not accumulated and we still use the old
10590b57cec5SDimitry Andric // one. If LowerGEP is enabled, a structure index is accumulated in the
10600b57cec5SDimitry Andric // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
10610b57cec5SDimitry Andric // handle the constant offset and won't need a new structure index.
10620b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP);
10630b57cec5SDimitry Andric for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
10640b57cec5SDimitry Andric if (GTI.isSequential()) {
1065bdd1243dSDimitry Andric // Constant offsets of scalable types are not really constant.
10665f757f3fSDimitry Andric if (GTI.getIndexedType()->isScalableTy())
1067bdd1243dSDimitry Andric continue;
1068bdd1243dSDimitry Andric
10690b57cec5SDimitry Andric // Splits this GEP index into a variadic part and a constant offset, and
10700b57cec5SDimitry Andric // uses the variadic part as the new index.
10710b57cec5SDimitry Andric Value *OldIdx = GEP->getOperand(I);
10720b57cec5SDimitry Andric User *UserChainTail;
10730b57cec5SDimitry Andric Value *NewIdx =
1074*0fca6ea1SDimitry Andric ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
10750b57cec5SDimitry Andric if (NewIdx != nullptr) {
10760b57cec5SDimitry Andric // Switches to the index with the constant offset removed.
10770b57cec5SDimitry Andric GEP->setOperand(I, NewIdx);
10780b57cec5SDimitry Andric // After switching to the new index, we can garbage-collect UserChain
10790b57cec5SDimitry Andric // and the old index if they are not used.
10800b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
10810b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
10820b57cec5SDimitry Andric }
10830b57cec5SDimitry Andric }
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric
10860b57cec5SDimitry Andric // Clear the inbounds attribute because the new index may be off-bound.
10870b57cec5SDimitry Andric // e.g.,
10880b57cec5SDimitry Andric //
10890b57cec5SDimitry Andric // b = add i64 a, 5
10900b57cec5SDimitry Andric // addr = gep inbounds float, float* p, i64 b
10910b57cec5SDimitry Andric //
10920b57cec5SDimitry Andric // is transformed to:
10930b57cec5SDimitry Andric //
10940b57cec5SDimitry Andric // addr2 = gep float, float* p, i64 a ; inbounds removed
10950b57cec5SDimitry Andric // addr = gep inbounds float, float* addr2, i64 5
10960b57cec5SDimitry Andric //
10970b57cec5SDimitry Andric // If a is -4, although the old index b is in bounds, the new index a is
10980b57cec5SDimitry Andric // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
10990b57cec5SDimitry Andric // inbounds keyword is not present, the offsets are added to the base
11000b57cec5SDimitry Andric // address with silently-wrapping two's complement arithmetic".
11010b57cec5SDimitry Andric // Therefore, the final code will be a semantically equivalent.
11020b57cec5SDimitry Andric //
11030b57cec5SDimitry Andric // TODO(jingyue): do some range analysis to keep as many inbounds as
11040b57cec5SDimitry Andric // possible. GEPs with inbounds are more friendly to alias analysis.
1105*0fca6ea1SDimitry Andric // TODO(gep_nowrap): Preserve nuw at least.
11060b57cec5SDimitry Andric bool GEPWasInBounds = GEP->isInBounds();
1107*0fca6ea1SDimitry Andric GEP->setNoWrapFlags(GEPNoWrapFlags::none());
11080b57cec5SDimitry Andric
11090b57cec5SDimitry Andric // Lowers a GEP to either GEPs with a single index or arithmetic operations.
11100b57cec5SDimitry Andric if (LowerGEP) {
11110b57cec5SDimitry Andric // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
11120b57cec5SDimitry Andric // arithmetic operations if the target uses alias analysis in codegen.
111306c3fb27SDimitry Andric // Additionally, pointers that aren't integral (and so can't be safely
111406c3fb27SDimitry Andric // converted to integers) or those whose offset size is different from their
111506c3fb27SDimitry Andric // pointer size (which means that doing integer arithmetic on them could
111606c3fb27SDimitry Andric // affect that data) can't be lowered in this way.
111706c3fb27SDimitry Andric unsigned AddrSpace = GEP->getPointerAddressSpace();
111806c3fb27SDimitry Andric bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
111906c3fb27SDimitry Andric DL->getIndexSizeInBits(AddrSpace);
112006c3fb27SDimitry Andric if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
112106c3fb27SDimitry Andric PointerHasExtraData)
11220b57cec5SDimitry Andric lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
11230b57cec5SDimitry Andric else
11240b57cec5SDimitry Andric lowerToArithmetics(GEP, AccumulativeByteOffset);
11250b57cec5SDimitry Andric return true;
11260b57cec5SDimitry Andric }
11270b57cec5SDimitry Andric
11280b57cec5SDimitry Andric // No need to create another GEP if the accumulative byte offset is 0.
11290b57cec5SDimitry Andric if (AccumulativeByteOffset == 0)
11300b57cec5SDimitry Andric return true;
11310b57cec5SDimitry Andric
11320b57cec5SDimitry Andric // Offsets the base with the accumulative byte offset.
11330b57cec5SDimitry Andric //
11340b57cec5SDimitry Andric // %gep ; the base
11350b57cec5SDimitry Andric // ... %gep ...
11360b57cec5SDimitry Andric //
11370b57cec5SDimitry Andric // => add the offset
11380b57cec5SDimitry Andric //
11390b57cec5SDimitry Andric // %gep2 ; clone of %gep
1140297eecfbSDimitry Andric // %new.gep = gep i8, %gep2, %offset
11410b57cec5SDimitry Andric // %gep ; will be removed
11420b57cec5SDimitry Andric // ... %gep ...
11430b57cec5SDimitry Andric //
11440b57cec5SDimitry Andric // => replace all uses of %gep with %new.gep and remove %gep
11450b57cec5SDimitry Andric //
11460b57cec5SDimitry Andric // %gep2 ; clone of %gep
1147297eecfbSDimitry Andric // %new.gep = gep i8, %gep2, %offset
11480b57cec5SDimitry Andric // ... %new.gep ...
11490b57cec5SDimitry Andric Instruction *NewGEP = GEP->clone();
11500b57cec5SDimitry Andric NewGEP->insertBefore(GEP);
11510b57cec5SDimitry Andric
115206c3fb27SDimitry Andric Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1153bdd1243dSDimitry Andric IRBuilder<> Builder(GEP);
11547a6dacacSDimitry Andric NewGEP = cast<Instruction>(Builder.CreatePtrAdd(
11557a6dacacSDimitry Andric NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true),
1156297eecfbSDimitry Andric GEP->getName(), GEPWasInBounds));
11570b57cec5SDimitry Andric NewGEP->copyMetadata(*GEP);
11580b57cec5SDimitry Andric
11590b57cec5SDimitry Andric GEP->replaceAllUsesWith(NewGEP);
11600b57cec5SDimitry Andric GEP->eraseFromParent();
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric return true;
11630b57cec5SDimitry Andric }
11640b57cec5SDimitry Andric
runOnFunction(Function & F)1165e8d8bef9SDimitry Andric bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
11660b57cec5SDimitry Andric if (skipFunction(F))
11670b57cec5SDimitry Andric return false;
1168e8d8bef9SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1169e8d8bef9SDimitry Andric auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1170e8d8bef9SDimitry Andric auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1171e8d8bef9SDimitry Andric auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1172e8d8bef9SDimitry Andric return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1173e8d8bef9SDimitry Andric };
117406c3fb27SDimitry Andric SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1175e8d8bef9SDimitry Andric return Impl.run(F);
1176e8d8bef9SDimitry Andric }
11770b57cec5SDimitry Andric
run(Function & F)1178e8d8bef9SDimitry Andric bool SeparateConstOffsetFromGEP::run(Function &F) {
11790b57cec5SDimitry Andric if (DisableSeparateConstOffsetFromGEP)
11800b57cec5SDimitry Andric return false;
11810b57cec5SDimitry Andric
1182*0fca6ea1SDimitry Andric DL = &F.getDataLayout();
11830b57cec5SDimitry Andric bool Changed = false;
11840b57cec5SDimitry Andric for (BasicBlock &B : F) {
1185349cc55cSDimitry Andric if (!DT->isReachableFromEntry(&B))
1186349cc55cSDimitry Andric continue;
1187349cc55cSDimitry Andric
1188349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(B))
1189349cc55cSDimitry Andric if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
11900b57cec5SDimitry Andric Changed |= splitGEP(GEP);
11910b57cec5SDimitry Andric // No need to split GEP ConstantExprs because all its indices are constant
11920b57cec5SDimitry Andric // already.
11930b57cec5SDimitry Andric }
11940b57cec5SDimitry Andric
11950b57cec5SDimitry Andric Changed |= reuniteExts(F);
11960b57cec5SDimitry Andric
11970b57cec5SDimitry Andric if (VerifyNoDeadCode)
11980b57cec5SDimitry Andric verifyNoDeadCode(F);
11990b57cec5SDimitry Andric
12000b57cec5SDimitry Andric return Changed;
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric
findClosestMatchingDominator(ExprKey Key,Instruction * Dominatee,DenseMap<ExprKey,SmallVector<Instruction *,2>> & DominatingExprs)12030b57cec5SDimitry Andric Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
120406c3fb27SDimitry Andric ExprKey Key, Instruction *Dominatee,
120506c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
12060b57cec5SDimitry Andric auto Pos = DominatingExprs.find(Key);
12070b57cec5SDimitry Andric if (Pos == DominatingExprs.end())
12080b57cec5SDimitry Andric return nullptr;
12090b57cec5SDimitry Andric
12100b57cec5SDimitry Andric auto &Candidates = Pos->second;
12110b57cec5SDimitry Andric // Because we process the basic blocks in pre-order of the dominator tree, a
12120b57cec5SDimitry Andric // candidate that doesn't dominate the current instruction won't dominate any
12130b57cec5SDimitry Andric // future instruction either. Therefore, we pop it out of the stack. This
12140b57cec5SDimitry Andric // optimization makes the algorithm O(n).
12150b57cec5SDimitry Andric while (!Candidates.empty()) {
12160b57cec5SDimitry Andric Instruction *Candidate = Candidates.back();
12170b57cec5SDimitry Andric if (DT->dominates(Candidate, Dominatee))
12180b57cec5SDimitry Andric return Candidate;
12190b57cec5SDimitry Andric Candidates.pop_back();
12200b57cec5SDimitry Andric }
12210b57cec5SDimitry Andric return nullptr;
12220b57cec5SDimitry Andric }
12230b57cec5SDimitry Andric
reuniteExts(Instruction * I)12240b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
122506c3fb27SDimitry Andric if (!I->getType()->isIntOrIntVectorTy())
12260b57cec5SDimitry Andric return false;
12270b57cec5SDimitry Andric
12280b57cec5SDimitry Andric // Dom: LHS+RHS
12290b57cec5SDimitry Andric // I: sext(LHS)+sext(RHS)
12300b57cec5SDimitry Andric // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
12310b57cec5SDimitry Andric // TODO: handle zext
12320b57cec5SDimitry Andric Value *LHS = nullptr, *RHS = nullptr;
12335ffd83dbSDimitry Andric if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
12340b57cec5SDimitry Andric if (LHS->getType() == RHS->getType()) {
123506c3fb27SDimitry Andric ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
12365ffd83dbSDimitry Andric if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1237*0fca6ea1SDimitry Andric Instruction *NewSExt =
1238*0fca6ea1SDimitry Andric new SExtInst(Dom, I->getType(), "", I->getIterator());
12395ffd83dbSDimitry Andric NewSExt->takeName(I);
12405ffd83dbSDimitry Andric I->replaceAllUsesWith(NewSExt);
1241*0fca6ea1SDimitry Andric NewSExt->setDebugLoc(I->getDebugLoc());
12425ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(I);
12435ffd83dbSDimitry Andric return true;
12445ffd83dbSDimitry Andric }
12455ffd83dbSDimitry Andric }
12465ffd83dbSDimitry Andric } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
12475ffd83dbSDimitry Andric if (LHS->getType() == RHS->getType()) {
124806c3fb27SDimitry Andric if (auto *Dom =
124906c3fb27SDimitry Andric findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
1250*0fca6ea1SDimitry Andric Instruction *NewSExt =
1251*0fca6ea1SDimitry Andric new SExtInst(Dom, I->getType(), "", I->getIterator());
12520b57cec5SDimitry Andric NewSExt->takeName(I);
12530b57cec5SDimitry Andric I->replaceAllUsesWith(NewSExt);
1254*0fca6ea1SDimitry Andric NewSExt->setDebugLoc(I->getDebugLoc());
12550b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(I);
12560b57cec5SDimitry Andric return true;
12570b57cec5SDimitry Andric }
12580b57cec5SDimitry Andric }
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
12625ffd83dbSDimitry Andric if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
12635ffd83dbSDimitry Andric if (programUndefinedIfPoison(I)) {
126406c3fb27SDimitry Andric ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
12655ffd83dbSDimitry Andric DominatingAdds[Key].push_back(I);
12665ffd83dbSDimitry Andric }
12675ffd83dbSDimitry Andric } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
126806c3fb27SDimitry Andric if (programUndefinedIfPoison(I))
126906c3fb27SDimitry Andric DominatingSubs[{LHS, RHS}].push_back(I);
12700b57cec5SDimitry Andric }
12710b57cec5SDimitry Andric return false;
12720b57cec5SDimitry Andric }
12730b57cec5SDimitry Andric
reuniteExts(Function & F)12740b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
12750b57cec5SDimitry Andric bool Changed = false;
12765ffd83dbSDimitry Andric DominatingAdds.clear();
12775ffd83dbSDimitry Andric DominatingSubs.clear();
12780b57cec5SDimitry Andric for (const auto Node : depth_first(DT)) {
12790b57cec5SDimitry Andric BasicBlock *BB = Node->getBlock();
1280349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(*BB))
1281349cc55cSDimitry Andric Changed |= reuniteExts(&I);
12820b57cec5SDimitry Andric }
12830b57cec5SDimitry Andric return Changed;
12840b57cec5SDimitry Andric }
12850b57cec5SDimitry Andric
verifyNoDeadCode(Function & F)12860b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
12870b57cec5SDimitry Andric for (BasicBlock &B : F) {
12880b57cec5SDimitry Andric for (Instruction &I : B) {
12890b57cec5SDimitry Andric if (isInstructionTriviallyDead(&I)) {
12900b57cec5SDimitry Andric std::string ErrMessage;
12910b57cec5SDimitry Andric raw_string_ostream RSO(ErrMessage);
12920b57cec5SDimitry Andric RSO << "Dead instruction detected!\n" << I << "\n";
12930b57cec5SDimitry Andric llvm_unreachable(RSO.str().c_str());
12940b57cec5SDimitry Andric }
12950b57cec5SDimitry Andric }
12960b57cec5SDimitry Andric }
12970b57cec5SDimitry Andric }
12980b57cec5SDimitry Andric
isLegalToSwapOperand(GetElementPtrInst * FirstGEP,GetElementPtrInst * SecondGEP,Loop * CurLoop)12990b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
13000b57cec5SDimitry Andric GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
13010b57cec5SDimitry Andric if (!FirstGEP || !FirstGEP->hasOneUse())
13020b57cec5SDimitry Andric return false;
13030b57cec5SDimitry Andric
13040b57cec5SDimitry Andric if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
13050b57cec5SDimitry Andric return false;
13060b57cec5SDimitry Andric
13070b57cec5SDimitry Andric if (FirstGEP == SecondGEP)
13080b57cec5SDimitry Andric return false;
13090b57cec5SDimitry Andric
13100b57cec5SDimitry Andric unsigned FirstNum = FirstGEP->getNumOperands();
13110b57cec5SDimitry Andric unsigned SecondNum = SecondGEP->getNumOperands();
13120b57cec5SDimitry Andric // Give up if the number of operands are not 2.
13130b57cec5SDimitry Andric if (FirstNum != SecondNum || FirstNum != 2)
13140b57cec5SDimitry Andric return false;
13150b57cec5SDimitry Andric
13160b57cec5SDimitry Andric Value *FirstBase = FirstGEP->getOperand(0);
13170b57cec5SDimitry Andric Value *SecondBase = SecondGEP->getOperand(0);
13180b57cec5SDimitry Andric Value *FirstOffset = FirstGEP->getOperand(1);
13190b57cec5SDimitry Andric // Give up if the index of the first GEP is loop invariant.
13200b57cec5SDimitry Andric if (CurLoop->isLoopInvariant(FirstOffset))
13210b57cec5SDimitry Andric return false;
13220b57cec5SDimitry Andric
13230b57cec5SDimitry Andric // Give up if base doesn't have same type.
13240b57cec5SDimitry Andric if (FirstBase->getType() != SecondBase->getType())
13250b57cec5SDimitry Andric return false;
13260b57cec5SDimitry Andric
13270b57cec5SDimitry Andric Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
13280b57cec5SDimitry Andric
13290b57cec5SDimitry Andric // Check if the second operand of first GEP has constant coefficient.
13300b57cec5SDimitry Andric // For an example, for the following code, we won't gain anything by
13310b57cec5SDimitry Andric // hoisting the second GEP out because the second GEP can be folded away.
13320b57cec5SDimitry Andric // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
13330b57cec5SDimitry Andric // %67 = shl i64 %scevgep.sum.ur159, 2
13340b57cec5SDimitry Andric // %uglygep160 = getelementptr i8* %65, i64 %67
13350b57cec5SDimitry Andric // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
13360b57cec5SDimitry Andric
13370b57cec5SDimitry Andric // Skip constant shift instruction which may be generated by Splitting GEPs.
13380b57cec5SDimitry Andric if (FirstOffsetDef && FirstOffsetDef->isShift() &&
13390b57cec5SDimitry Andric isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
13400b57cec5SDimitry Andric FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
13410b57cec5SDimitry Andric
13420b57cec5SDimitry Andric // Give up if FirstOffsetDef is an Add or Sub with constant.
13430b57cec5SDimitry Andric // Because it may not profitable at all due to constant folding.
13440b57cec5SDimitry Andric if (FirstOffsetDef)
13450b57cec5SDimitry Andric if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
13460b57cec5SDimitry Andric unsigned opc = BO->getOpcode();
13470b57cec5SDimitry Andric if ((opc == Instruction::Add || opc == Instruction::Sub) &&
13480b57cec5SDimitry Andric (isa<ConstantInt>(BO->getOperand(0)) ||
13490b57cec5SDimitry Andric isa<ConstantInt>(BO->getOperand(1))))
13500b57cec5SDimitry Andric return false;
13510b57cec5SDimitry Andric }
13520b57cec5SDimitry Andric return true;
13530b57cec5SDimitry Andric }
13540b57cec5SDimitry Andric
hasMoreThanOneUseInLoop(Value * V,Loop * L)13550b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
13560b57cec5SDimitry Andric int UsesInLoop = 0;
13570b57cec5SDimitry Andric for (User *U : V->users()) {
13580b57cec5SDimitry Andric if (Instruction *User = dyn_cast<Instruction>(U))
13590b57cec5SDimitry Andric if (L->contains(User))
13600b57cec5SDimitry Andric if (++UsesInLoop > 1)
13610b57cec5SDimitry Andric return true;
13620b57cec5SDimitry Andric }
13630b57cec5SDimitry Andric return false;
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric
swapGEPOperand(GetElementPtrInst * First,GetElementPtrInst * Second)13660b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
13670b57cec5SDimitry Andric GetElementPtrInst *Second) {
13680b57cec5SDimitry Andric Value *Offset1 = First->getOperand(1);
13690b57cec5SDimitry Andric Value *Offset2 = Second->getOperand(1);
13700b57cec5SDimitry Andric First->setOperand(1, Offset2);
13710b57cec5SDimitry Andric Second->setOperand(1, Offset1);
13720b57cec5SDimitry Andric
13730b57cec5SDimitry Andric // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1374*0fca6ea1SDimitry Andric const DataLayout &DAL = First->getDataLayout();
13750b57cec5SDimitry Andric APInt Offset(DAL.getIndexSizeInBits(
13760b57cec5SDimitry Andric cast<PointerType>(First->getType())->getAddressSpace()),
13770b57cec5SDimitry Andric 0);
13780b57cec5SDimitry Andric Value *NewBase =
13790b57cec5SDimitry Andric First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
13800b57cec5SDimitry Andric uint64_t ObjectSize;
13810b57cec5SDimitry Andric if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
13820b57cec5SDimitry Andric Offset.ugt(ObjectSize)) {
1383*0fca6ea1SDimitry Andric // TODO(gep_nowrap): Make flag preservation more precise.
1384*0fca6ea1SDimitry Andric First->setNoWrapFlags(GEPNoWrapFlags::none());
1385*0fca6ea1SDimitry Andric Second->setNoWrapFlags(GEPNoWrapFlags::none());
13860b57cec5SDimitry Andric } else
13870b57cec5SDimitry Andric First->setIsInBounds(true);
13880b57cec5SDimitry Andric }
1389e8d8bef9SDimitry Andric
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)139006c3fb27SDimitry Andric void SeparateConstOffsetFromGEPPass::printPipeline(
139106c3fb27SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
139206c3fb27SDimitry Andric static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> *>(this)
139306c3fb27SDimitry Andric ->printPipeline(OS, MapClassName2PassName);
139406c3fb27SDimitry Andric OS << '<';
139506c3fb27SDimitry Andric if (LowerGEP)
139606c3fb27SDimitry Andric OS << "lower-gep";
139706c3fb27SDimitry Andric OS << '>';
139806c3fb27SDimitry Andric }
139906c3fb27SDimitry Andric
1400e8d8bef9SDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)1401e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
1402e8d8bef9SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1403e8d8bef9SDimitry Andric auto *LI = &AM.getResult<LoopAnalysis>(F);
1404e8d8bef9SDimitry Andric auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1405e8d8bef9SDimitry Andric auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1406e8d8bef9SDimitry Andric return AM.getResult<TargetIRAnalysis>(F);
1407e8d8bef9SDimitry Andric };
140806c3fb27SDimitry Andric SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1409e8d8bef9SDimitry Andric if (!Impl.run(F))
1410e8d8bef9SDimitry Andric return PreservedAnalyses::all();
1411e8d8bef9SDimitry Andric PreservedAnalyses PA;
1412e8d8bef9SDimitry Andric PA.preserveSet<CFGAnalyses>();
1413e8d8bef9SDimitry Andric return PA;
1414e8d8bef9SDimitry Andric }
1415