1e8d8bef9SDimitry Andric //===- HexagonVectorLoopCarriedReuse.h ------------------------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // This pass removes the computation of provably redundant expressions that have 10e8d8bef9SDimitry Andric // been computed earlier in a previous iteration. It relies on the use of PHIs 11e8d8bef9SDimitry Andric // to identify loop carried dependences. This is scalar replacement for vector 12e8d8bef9SDimitry Andric // types. 13e8d8bef9SDimitry Andric // 14e8d8bef9SDimitry Andric //----------------------------------------------------------------------------- 15e8d8bef9SDimitry Andric // Motivation: Consider the case where we have the following loop structure. 16e8d8bef9SDimitry Andric // 17e8d8bef9SDimitry Andric // Loop: 18e8d8bef9SDimitry Andric // t0 = a[i]; 19e8d8bef9SDimitry Andric // t1 = f(t0); 20e8d8bef9SDimitry Andric // t2 = g(t1); 21e8d8bef9SDimitry Andric // ... 22e8d8bef9SDimitry Andric // t3 = a[i+1]; 23e8d8bef9SDimitry Andric // t4 = f(t3); 24e8d8bef9SDimitry Andric // t5 = g(t4); 25e8d8bef9SDimitry Andric // t6 = op(t2, t5) 26e8d8bef9SDimitry Andric // cond_branch <Loop> 27e8d8bef9SDimitry Andric // 28e8d8bef9SDimitry Andric // This can be converted to 29e8d8bef9SDimitry Andric // t00 = a[0]; 30e8d8bef9SDimitry Andric // t10 = f(t00); 31e8d8bef9SDimitry Andric // t20 = g(t10); 32e8d8bef9SDimitry Andric // Loop: 33e8d8bef9SDimitry Andric // t2 = t20; 34e8d8bef9SDimitry Andric // t3 = a[i+1]; 35e8d8bef9SDimitry Andric // t4 = f(t3); 36e8d8bef9SDimitry Andric // t5 = g(t4); 37e8d8bef9SDimitry Andric // t6 = op(t2, t5) 38e8d8bef9SDimitry Andric // t20 = t5 39e8d8bef9SDimitry Andric // cond_branch <Loop> 40e8d8bef9SDimitry Andric // 41e8d8bef9SDimitry Andric // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. 42e8d8bef9SDimitry Andric // Such a loop comes to this pass in the following form. 43e8d8bef9SDimitry Andric // 44e8d8bef9SDimitry Andric // LoopPreheader: 45e8d8bef9SDimitry Andric // X0 = a[0]; 46e8d8bef9SDimitry Andric // Loop: 47e8d8bef9SDimitry Andric // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 48e8d8bef9SDimitry Andric // t1 = f(X2) <-- I1 49e8d8bef9SDimitry Andric // t2 = g(t1) 50e8d8bef9SDimitry Andric // ... 51e8d8bef9SDimitry Andric // X1 = a[i+1] 52e8d8bef9SDimitry Andric // t4 = f(X1) <-- I2 53e8d8bef9SDimitry Andric // t5 = g(t4) 54e8d8bef9SDimitry Andric // t6 = op(t2, t5) 55e8d8bef9SDimitry Andric // cond_branch <Loop> 56e8d8bef9SDimitry Andric // 57e8d8bef9SDimitry Andric // In this pass, we look for PHIs such as X2 whose incoming values come only 58e8d8bef9SDimitry Andric // from the Loop Preheader and over the backedge and additionaly, both these 59e8d8bef9SDimitry Andric // values are the results of the same operation in terms of opcode. We call such 60e8d8bef9SDimitry Andric // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 61e8d8bef9SDimitry Andric // over X1 is carried over only one iteration and so the DepChain is only one 62e8d8bef9SDimitry Andric // PHI node long. 63e8d8bef9SDimitry Andric // 64e8d8bef9SDimitry Andric // Then, we traverse the uses of the PHI (X2) and the uses of the value of the 65e8d8bef9SDimitry Andric // PHI coming over the backedge (X1). We stop at the first pair of such users 66e8d8bef9SDimitry Andric // I1 (of X2) and I2 (of X1) that meet the following conditions. 67e8d8bef9SDimitry Andric // 1. I1 and I2 are the same operation, but with different operands. 68e8d8bef9SDimitry Andric // 2. X2 and X1 are used at the same operand number in the two instructions. 69e8d8bef9SDimitry Andric // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a 70e8d8bef9SDimitry Andric // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. 71e8d8bef9SDimitry Andric // 72e8d8bef9SDimitry Andric // We then make the following transformation 73e8d8bef9SDimitry Andric // LoopPreheader: 74e8d8bef9SDimitry Andric // X0 = a[0]; 75e8d8bef9SDimitry Andric // Y0 = f(X0); 76e8d8bef9SDimitry Andric // Loop: 77e8d8bef9SDimitry Andric // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 78e8d8bef9SDimitry Andric // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> 79e8d8bef9SDimitry Andric // t1 = f(X2) <-- Will be removed by DCE. 80e8d8bef9SDimitry Andric // t2 = g(Y2) 81e8d8bef9SDimitry Andric // ... 82e8d8bef9SDimitry Andric // X1 = a[i+1] 83e8d8bef9SDimitry Andric // t4 = f(X1) 84e8d8bef9SDimitry Andric // t5 = g(t4) 85e8d8bef9SDimitry Andric // t6 = op(t2, t5) 86e8d8bef9SDimitry Andric // cond_branch <Loop> 87e8d8bef9SDimitry Andric // 88e8d8bef9SDimitry Andric // We proceed until we cannot find any more such instructions I1 and I2. 89e8d8bef9SDimitry Andric // 90e8d8bef9SDimitry Andric // --- DepChains & Loop carried dependences --- 91e8d8bef9SDimitry Andric // Consider a single basic block loop such as 92e8d8bef9SDimitry Andric // 93e8d8bef9SDimitry Andric // LoopPreheader: 94e8d8bef9SDimitry Andric // X0 = ... 95e8d8bef9SDimitry Andric // Y0 = ... 96e8d8bef9SDimitry Andric // Loop: 97e8d8bef9SDimitry Andric // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 98e8d8bef9SDimitry Andric // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> 99e8d8bef9SDimitry Andric // ... 100e8d8bef9SDimitry Andric // X1 = ... 101e8d8bef9SDimitry Andric // ... 102e8d8bef9SDimitry Andric // cond_branch <Loop> 103e8d8bef9SDimitry Andric // 104e8d8bef9SDimitry Andric // Then there is a dependence between X2 and X1 that goes back one iteration, 105e8d8bef9SDimitry Andric // i.e. X1 is used as X2 in the very next iteration. We represent this as a 106e8d8bef9SDimitry Andric // DepChain from X2 to X1 (X2->X1). 107e8d8bef9SDimitry Andric // Similarly, there is a dependence between Y2 and X1 that goes back two 108e8d8bef9SDimitry Andric // iterations. X1 is used as Y2 two iterations after it is computed. This is 109e8d8bef9SDimitry Andric // represented by a DepChain as (Y2->X2->X1). 110e8d8bef9SDimitry Andric // 111e8d8bef9SDimitry Andric // A DepChain has the following properties. 112e8d8bef9SDimitry Andric // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of 113e8d8bef9SDimitry Andric // iterations of carried dependence + 1. 114e8d8bef9SDimitry Andric // 2. All instructions in the DepChain except the last are PHIs. 115e8d8bef9SDimitry Andric // 116e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 117e8d8bef9SDimitry Andric 118e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H 119e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H 120e8d8bef9SDimitry Andric 121e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 122e8d8bef9SDimitry Andric 123e8d8bef9SDimitry Andric namespace llvm { 124e8d8bef9SDimitry Andric 125e8d8bef9SDimitry Andric class Loop; 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric /// Hexagon Vector Loop Carried Reuse Pass 128e8d8bef9SDimitry Andric struct HexagonVectorLoopCarriedReusePass 129e8d8bef9SDimitry Andric : public PassInfoMixin<HexagonVectorLoopCarriedReusePass> { 130*81ad6265SDimitry Andric HexagonVectorLoopCarriedReusePass() = default; 131e8d8bef9SDimitry Andric 132e8d8bef9SDimitry Andric /// Run pass over the Loop. 133e8d8bef9SDimitry Andric PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, 134e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR, LPMUpdater &U); 135e8d8bef9SDimitry Andric }; 136e8d8bef9SDimitry Andric 137e8d8bef9SDimitry Andric } // end namespace llvm 138e8d8bef9SDimitry Andric 139e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H 140