1 //===- InstCombineCasts.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for cast operations.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/Analysis/ConstantFolding.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/DebugInfo.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Support/KnownBits.h"
20 #include "llvm/Transforms/InstCombine/InstCombiner.h"
21 #include <optional>
22
23 using namespace llvm;
24 using namespace PatternMatch;
25
26 #define DEBUG_TYPE "instcombine"
27
28 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29 /// true for, actually insert the code to evaluate the expression.
EvaluateInDifferentType(Value * V,Type * Ty,bool isSigned)30 Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
33 return ConstantFoldIntegerCast(C, Ty, isSigned, DL);
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
53 Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
54 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
55 Res->setIsExact(I->isExact());
56 break;
57 }
58 case Instruction::Trunc:
59 case Instruction::ZExt:
60 case Instruction::SExt:
61 // If the source type of the cast is the type we're trying for then we can
62 // just return the source. There's no need to insert it because it is not
63 // new.
64 if (I->getOperand(0)->getType() == Ty)
65 return I->getOperand(0);
66
67 // Otherwise, must be the same type of cast, so just reinsert a new one.
68 // This also handles the case of zext(trunc(x)) -> zext(x).
69 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
70 Opc == Instruction::SExt);
71 break;
72 case Instruction::Select: {
73 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
74 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
75 Res = SelectInst::Create(I->getOperand(0), True, False);
76 break;
77 }
78 case Instruction::PHI: {
79 PHINode *OPN = cast<PHINode>(I);
80 PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
81 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
82 Value *V =
83 EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
84 NPN->addIncoming(V, OPN->getIncomingBlock(i));
85 }
86 Res = NPN;
87 break;
88 }
89 case Instruction::FPToUI:
90 case Instruction::FPToSI:
91 Res = CastInst::Create(
92 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
93 break;
94 case Instruction::Call:
95 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
96 switch (II->getIntrinsicID()) {
97 default:
98 llvm_unreachable("Unsupported call!");
99 case Intrinsic::vscale: {
100 Function *Fn = Intrinsic::getOrInsertDeclaration(
101 I->getModule(), Intrinsic::vscale, {Ty});
102 Res = CallInst::Create(Fn->getFunctionType(), Fn);
103 break;
104 }
105 }
106 }
107 break;
108 case Instruction::ShuffleVector: {
109 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
110 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
111 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
112 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
113 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
114 Res = new ShuffleVectorInst(Op0, Op1,
115 cast<ShuffleVectorInst>(I)->getShuffleMask());
116 break;
117 }
118 default:
119 // TODO: Can handle more cases here.
120 llvm_unreachable("Unreachable!");
121 }
122
123 Res->takeName(I);
124 return InsertNewInstWith(Res, I->getIterator());
125 }
126
127 Instruction::CastOps
isEliminableCastPair(const CastInst * CI1,const CastInst * CI2)128 InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
129 const CastInst *CI2) {
130 Type *SrcTy = CI1->getSrcTy();
131 Type *MidTy = CI1->getDestTy();
132 Type *DstTy = CI2->getDestTy();
133
134 Instruction::CastOps firstOp = CI1->getOpcode();
135 Instruction::CastOps secondOp = CI2->getOpcode();
136 Type *SrcIntPtrTy =
137 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
138 Type *MidIntPtrTy =
139 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
140 Type *DstIntPtrTy =
141 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
142 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
143 DstTy, SrcIntPtrTy, MidIntPtrTy,
144 DstIntPtrTy);
145
146 // We don't want to form an inttoptr or ptrtoint that converts to an integer
147 // type that differs from the pointer size.
148 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
149 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
150 Res = 0;
151
152 return Instruction::CastOps(Res);
153 }
154
155 /// Implement the transforms common to all CastInst visitors.
commonCastTransforms(CastInst & CI)156 Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
157 Value *Src = CI.getOperand(0);
158 Type *Ty = CI.getType();
159
160 if (auto *SrcC = dyn_cast<Constant>(Src))
161 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
162 return replaceInstUsesWith(CI, Res);
163
164 // Try to eliminate a cast of a cast.
165 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
166 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
167 // The first cast (CSrc) is eliminable so we need to fix up or replace
168 // the second cast (CI). CSrc will then have a good chance of being dead.
169 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
170 // Point debug users of the dying cast to the new one.
171 if (CSrc->hasOneUse())
172 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
173 return Res;
174 }
175 }
176
177 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
178 // We are casting a select. Try to fold the cast into the select if the
179 // select does not have a compare instruction with matching operand types
180 // or the select is likely better done in a narrow type.
181 // Creating a select with operands that are different sizes than its
182 // condition may inhibit other folds and lead to worse codegen.
183 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
184 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
185 (CI.getOpcode() == Instruction::Trunc &&
186 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
187
188 // If it's a bitcast involving vectors, make sure it has the same number
189 // of elements on both sides.
190 if (CI.getOpcode() != Instruction::BitCast ||
191 match(&CI, m_ElementWiseBitCast(m_Value()))) {
192 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
193 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
194 return NV;
195 }
196 }
197 }
198 }
199
200 // If we are casting a PHI, then fold the cast into the PHI.
201 if (auto *PN = dyn_cast<PHINode>(Src)) {
202 // Don't do this if it would create a PHI node with an illegal type from a
203 // legal type.
204 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
205 shouldChangeType(CI.getSrcTy(), CI.getType()))
206 if (Instruction *NV = foldOpIntoPhi(CI, PN))
207 return NV;
208 }
209
210 // Canonicalize a unary shuffle after the cast if neither operation changes
211 // the size or element size of the input vector.
212 // TODO: We could allow size-changing ops if that doesn't harm codegen.
213 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
214 Value *X;
215 ArrayRef<int> Mask;
216 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
217 // TODO: Allow scalable vectors?
218 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
219 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
220 if (SrcTy && DestTy &&
221 SrcTy->getNumElements() == DestTy->getNumElements() &&
222 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
223 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
224 return new ShuffleVectorInst(CastX, Mask);
225 }
226 }
227
228 return nullptr;
229 }
230
231 /// Constants and extensions/truncates from the destination type are always
232 /// free to be evaluated in that type. This is a helper for canEvaluate*.
canAlwaysEvaluateInType(Value * V,Type * Ty)233 static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
234 if (isa<Constant>(V))
235 return match(V, m_ImmConstant());
236
237 Value *X;
238 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
239 X->getType() == Ty)
240 return true;
241
242 return false;
243 }
244
245 /// Filter out values that we can not evaluate in the destination type for free.
246 /// This is a helper for canEvaluate*.
canNotEvaluateInType(Value * V,Type * Ty)247 static bool canNotEvaluateInType(Value *V, Type *Ty) {
248 if (!isa<Instruction>(V))
249 return true;
250 // We don't extend or shrink something that has multiple uses -- doing so
251 // would require duplicating the instruction which isn't profitable.
252 if (!V->hasOneUse())
253 return true;
254
255 return false;
256 }
257
258 /// Return true if we can evaluate the specified expression tree as type Ty
259 /// instead of its larger type, and arrive with the same value.
260 /// This is used by code that tries to eliminate truncates.
261 ///
262 /// Ty will always be a type smaller than V. We should return true if trunc(V)
263 /// can be computed by computing V in the smaller type. If V is an instruction,
264 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
265 /// makes sense if x and y can be efficiently truncated.
266 ///
267 /// This function works on both vectors and scalars.
268 ///
canEvaluateTruncated(Value * V,Type * Ty,InstCombinerImpl & IC,Instruction * CxtI)269 static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
270 Instruction *CxtI) {
271 if (canAlwaysEvaluateInType(V, Ty))
272 return true;
273 if (canNotEvaluateInType(V, Ty))
274 return false;
275
276 auto *I = cast<Instruction>(V);
277 Type *OrigTy = V->getType();
278 switch (I->getOpcode()) {
279 case Instruction::Add:
280 case Instruction::Sub:
281 case Instruction::Mul:
282 case Instruction::And:
283 case Instruction::Or:
284 case Instruction::Xor:
285 // These operators can all arbitrarily be extended or truncated.
286 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
287 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
288
289 case Instruction::UDiv:
290 case Instruction::URem: {
291 // UDiv and URem can be truncated if all the truncated bits are zero.
292 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
293 uint32_t BitWidth = Ty->getScalarSizeInBits();
294 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
295 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
296 // Do not preserve the original context instruction. Simplifying div/rem
297 // based on later context may introduce a trap.
298 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
299 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
300 return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&
301 canEvaluateTruncated(I->getOperand(1), Ty, IC, I);
302 }
303 break;
304 }
305 case Instruction::Shl: {
306 // If we are truncating the result of this SHL, and if it's a shift of an
307 // inrange amount, we can always perform a SHL in a smaller type.
308 uint32_t BitWidth = Ty->getScalarSizeInBits();
309 KnownBits AmtKnownBits =
310 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
311 if (AmtKnownBits.getMaxValue().ult(BitWidth))
312 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
313 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
314 break;
315 }
316 case Instruction::LShr: {
317 // If this is a truncate of a logical shr, we can truncate it to a smaller
318 // lshr iff we know that the bits we would otherwise be shifting in are
319 // already zeros.
320 // TODO: It is enough to check that the bits we would be shifting in are
321 // zero - use AmtKnownBits.getMaxValue().
322 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
323 uint32_t BitWidth = Ty->getScalarSizeInBits();
324 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
325 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
326 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
327 if (MaxShiftAmt.ult(BitWidth)) {
328 // If the only user is a trunc then we can narrow the shift if any new
329 // MSBs are not going to be used.
330 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
331 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
332 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
333 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
334 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
335 }
336 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
337 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
338 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
339 }
340 break;
341 }
342 case Instruction::AShr: {
343 // If this is a truncate of an arithmetic shr, we can truncate it to a
344 // smaller ashr iff we know that all the bits from the sign bit of the
345 // original type and the sign bit of the truncate type are similar.
346 // TODO: It is enough to check that the bits we would be shifting in are
347 // similar to sign bit of the truncate type.
348 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
349 uint32_t BitWidth = Ty->getScalarSizeInBits();
350 KnownBits AmtKnownBits =
351 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
352 unsigned ShiftedBits = OrigBitWidth - BitWidth;
353 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
354 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
355 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
356 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
357 break;
358 }
359 case Instruction::Trunc:
360 // trunc(trunc(x)) -> trunc(x)
361 return true;
362 case Instruction::ZExt:
363 case Instruction::SExt:
364 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
365 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
366 return true;
367 case Instruction::Select: {
368 SelectInst *SI = cast<SelectInst>(I);
369 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
370 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
371 }
372 case Instruction::PHI: {
373 // We can change a phi if we can change all operands. Note that we never
374 // get into trouble with cyclic PHIs here because we only consider
375 // instructions with a single use.
376 PHINode *PN = cast<PHINode>(I);
377 for (Value *IncValue : PN->incoming_values())
378 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
379 return false;
380 return true;
381 }
382 case Instruction::FPToUI:
383 case Instruction::FPToSI: {
384 // If the integer type can hold the max FP value, it is safe to cast
385 // directly to that type. Otherwise, we may create poison via overflow
386 // that did not exist in the original code.
387 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
388 const fltSemantics &Semantics = InputTy->getFltSemantics();
389 uint32_t MinBitWidth =
390 APFloatBase::semanticsIntSizeInBits(Semantics,
391 I->getOpcode() == Instruction::FPToSI);
392 return Ty->getScalarSizeInBits() >= MinBitWidth;
393 }
394 case Instruction::ShuffleVector:
395 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
396 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
397 default:
398 // TODO: Can handle more cases here.
399 break;
400 }
401
402 return false;
403 }
404
405 /// Given a vector that is bitcast to an integer, optionally logically
406 /// right-shifted, and truncated, convert it to an extractelement.
407 /// Example (big endian):
408 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
409 /// --->
410 /// extractelement <4 x i32> %X, 1
foldVecTruncToExtElt(TruncInst & Trunc,InstCombinerImpl & IC)411 static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
412 InstCombinerImpl &IC) {
413 Value *TruncOp = Trunc.getOperand(0);
414 Type *DestType = Trunc.getType();
415 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
416 return nullptr;
417
418 Value *VecInput = nullptr;
419 ConstantInt *ShiftVal = nullptr;
420 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
421 m_LShr(m_BitCast(m_Value(VecInput)),
422 m_ConstantInt(ShiftVal)))) ||
423 !isa<VectorType>(VecInput->getType()))
424 return nullptr;
425
426 VectorType *VecType = cast<VectorType>(VecInput->getType());
427 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
428 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
429 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
430
431 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
432 return nullptr;
433
434 // If the element type of the vector doesn't match the result type,
435 // bitcast it to a vector type that we can extract from.
436 unsigned NumVecElts = VecWidth / DestWidth;
437 if (VecType->getElementType() != DestType) {
438 VecType = FixedVectorType::get(DestType, NumVecElts);
439 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
440 }
441
442 unsigned Elt = ShiftAmount / DestWidth;
443 if (IC.getDataLayout().isBigEndian())
444 Elt = NumVecElts - 1 - Elt;
445
446 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
447 }
448
449 /// Whenever an element is extracted from a vector, optionally shifted down, and
450 /// then truncated, canonicalize by converting it to a bitcast followed by an
451 /// extractelement.
452 ///
453 /// Examples (little endian):
454 /// trunc (extractelement <4 x i64> %X, 0) to i32
455 /// --->
456 /// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
457 ///
458 /// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
459 /// --->
460 /// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
foldVecExtTruncToExtElt(TruncInst & Trunc,InstCombinerImpl & IC)461 static Instruction *foldVecExtTruncToExtElt(TruncInst &Trunc,
462 InstCombinerImpl &IC) {
463 Value *Src = Trunc.getOperand(0);
464 Type *SrcType = Src->getType();
465 Type *DstType = Trunc.getType();
466
467 // Only attempt this if we have simple aliasing of the vector elements.
468 // A badly fit destination size would result in an invalid cast.
469 unsigned SrcBits = SrcType->getScalarSizeInBits();
470 unsigned DstBits = DstType->getScalarSizeInBits();
471 unsigned TruncRatio = SrcBits / DstBits;
472 if ((SrcBits % DstBits) != 0)
473 return nullptr;
474
475 Value *VecOp;
476 ConstantInt *Cst;
477 const APInt *ShiftAmount = nullptr;
478 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
479 !match(Src,
480 m_OneUse(m_LShr(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)),
481 m_APInt(ShiftAmount)))))
482 return nullptr;
483
484 auto *VecOpTy = cast<VectorType>(VecOp->getType());
485 auto VecElts = VecOpTy->getElementCount();
486
487 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
488 uint64_t VecOpIdx = Cst->getZExtValue();
489 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
490 ? (VecOpIdx + 1) * TruncRatio - 1
491 : VecOpIdx * TruncRatio;
492
493 // Adjust index by the whole number of truncated elements.
494 if (ShiftAmount) {
495 // Check shift amount is in range and shifts a whole number of truncated
496 // elements.
497 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
498 return nullptr;
499
500 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
501 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
502 : (NewIdx + IdxOfs);
503 }
504
505 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
506 NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
507
508 auto *BitCastTo =
509 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
510 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
511 return ExtractElementInst::Create(BitCast, IC.Builder.getInt32(NewIdx));
512 }
513
514 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
515 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
narrowFunnelShift(TruncInst & Trunc)516 Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
517 assert((isa<VectorType>(Trunc.getSrcTy()) ||
518 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
519 "Don't narrow to an illegal scalar type");
520
521 // Bail out on strange types. It is possible to handle some of these patterns
522 // even with non-power-of-2 sizes, but it is not a likely scenario.
523 Type *DestTy = Trunc.getType();
524 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
525 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
526 if (!isPowerOf2_32(NarrowWidth))
527 return nullptr;
528
529 // First, find an or'd pair of opposite shifts:
530 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
531 BinaryOperator *Or0, *Or1;
532 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
533 return nullptr;
534
535 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
536 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
537 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
538 Or0->getOpcode() == Or1->getOpcode())
539 return nullptr;
540
541 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
542 if (Or0->getOpcode() == BinaryOperator::LShr) {
543 std::swap(Or0, Or1);
544 std::swap(ShVal0, ShVal1);
545 std::swap(ShAmt0, ShAmt1);
546 }
547 assert(Or0->getOpcode() == BinaryOperator::Shl &&
548 Or1->getOpcode() == BinaryOperator::LShr &&
549 "Illegal or(shift,shift) pair");
550
551 // Match the shift amount operands for a funnel/rotate pattern. This always
552 // matches a subtraction on the R operand.
553 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
554 // The shift amounts may add up to the narrow bit width:
555 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
556 // If this is a funnel shift (different operands are shifted), then the
557 // shift amount can not over-shift (create poison) in the narrow type.
558 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
559 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
560 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
561 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
562 return L;
563
564 // The following patterns currently only work for rotation patterns.
565 // TODO: Add more general funnel-shift compatible patterns.
566 if (ShVal0 != ShVal1)
567 return nullptr;
568
569 // The shift amount may be masked with negation:
570 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
571 Value *X;
572 unsigned Mask = Width - 1;
573 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
574 match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
575 return X;
576
577 // Same as above, but the shift amount may be extended after masking:
578 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
579 match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
580 return X;
581
582 return nullptr;
583 };
584
585 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
586 bool IsFshl = true; // Sub on LSHR.
587 if (!ShAmt) {
588 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
589 IsFshl = false; // Sub on SHL.
590 }
591 if (!ShAmt)
592 return nullptr;
593
594 // The right-shifted value must have high zeros in the wide type (for example
595 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
596 // truncated, so those do not matter.
597 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
598 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
599 return nullptr;
600
601 // Adjust the width of ShAmt for narrowed funnel shift operation:
602 // - Zero-extend if ShAmt is narrower than the destination type.
603 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
604 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
605 // zext/trunc(ShAmt)).
606 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
607
608 Value *X, *Y;
609 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
610 if (ShVal0 != ShVal1)
611 Y = Builder.CreateTrunc(ShVal1, DestTy);
612 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
613 Function *F =
614 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
615 return CallInst::Create(F, {X, Y, NarrowShAmt});
616 }
617
618 /// Try to narrow the width of math or bitwise logic instructions by pulling a
619 /// truncate ahead of binary operators.
narrowBinOp(TruncInst & Trunc)620 Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
621 Type *SrcTy = Trunc.getSrcTy();
622 Type *DestTy = Trunc.getType();
623 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
624 unsigned DestWidth = DestTy->getScalarSizeInBits();
625
626 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
627 return nullptr;
628
629 BinaryOperator *BinOp;
630 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
631 return nullptr;
632
633 Value *BinOp0 = BinOp->getOperand(0);
634 Value *BinOp1 = BinOp->getOperand(1);
635 switch (BinOp->getOpcode()) {
636 case Instruction::And:
637 case Instruction::Or:
638 case Instruction::Xor:
639 case Instruction::Add:
640 case Instruction::Sub:
641 case Instruction::Mul: {
642 Constant *C;
643 if (match(BinOp0, m_Constant(C))) {
644 // trunc (binop C, X) --> binop (trunc C', X)
645 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
646 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
647 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
648 }
649 if (match(BinOp1, m_Constant(C))) {
650 // trunc (binop X, C) --> binop (trunc X, C')
651 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
652 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
653 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
654 }
655 Value *X;
656 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
657 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
658 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
659 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
660 }
661 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
662 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
663 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
664 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
665 }
666 break;
667 }
668 case Instruction::LShr:
669 case Instruction::AShr: {
670 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
671 Value *A;
672 Constant *C;
673 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
674 unsigned MaxShiftAmt = SrcWidth - DestWidth;
675 // If the shift is small enough, all zero/sign bits created by the shift
676 // are removed by the trunc.
677 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
678 APInt(SrcWidth, MaxShiftAmt)))) {
679 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
680 bool IsExact = OldShift->isExact();
681 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
682 /*IsSigned*/ true, DL)) {
683 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
684 Value *Shift =
685 OldShift->getOpcode() == Instruction::AShr
686 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
687 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
688 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
689 }
690 }
691 }
692 break;
693 }
694 default: break;
695 }
696
697 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
698 return NarrowOr;
699
700 return nullptr;
701 }
702
703 /// Try to narrow the width of a splat shuffle. This could be generalized to any
704 /// shuffle with a constant operand, but we limit the transform to avoid
705 /// creating a shuffle type that targets may not be able to lower effectively.
shrinkSplatShuffle(TruncInst & Trunc,InstCombiner::BuilderTy & Builder)706 static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
707 InstCombiner::BuilderTy &Builder) {
708 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
709 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
710 all_equal(Shuf->getShuffleMask()) &&
711 Shuf->getType() == Shuf->getOperand(0)->getType()) {
712 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
713 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
714 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
715 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
716 }
717
718 return nullptr;
719 }
720
721 /// Try to narrow the width of an insert element. This could be generalized for
722 /// any vector constant, but we limit the transform to insertion into undef to
723 /// avoid potential backend problems from unsupported insertion widths. This
724 /// could also be extended to handle the case of inserting a scalar constant
725 /// into a vector variable.
shrinkInsertElt(CastInst & Trunc,InstCombiner::BuilderTy & Builder)726 static Instruction *shrinkInsertElt(CastInst &Trunc,
727 InstCombiner::BuilderTy &Builder) {
728 Instruction::CastOps Opcode = Trunc.getOpcode();
729 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
730 "Unexpected instruction for shrinking");
731
732 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
733 if (!InsElt || !InsElt->hasOneUse())
734 return nullptr;
735
736 Type *DestTy = Trunc.getType();
737 Type *DestScalarTy = DestTy->getScalarType();
738 Value *VecOp = InsElt->getOperand(0);
739 Value *ScalarOp = InsElt->getOperand(1);
740 Value *Index = InsElt->getOperand(2);
741
742 if (match(VecOp, m_Undef())) {
743 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
744 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
745 UndefValue *NarrowUndef = UndefValue::get(DestTy);
746 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
747 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
748 }
749
750 return nullptr;
751 }
752
visitTrunc(TruncInst & Trunc)753 Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
754 if (Instruction *Result = commonCastTransforms(Trunc))
755 return Result;
756
757 Value *Src = Trunc.getOperand(0);
758 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
759 unsigned DestWidth = DestTy->getScalarSizeInBits();
760 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
761
762 // Attempt to truncate the entire input expression tree to the destination
763 // type. Only do this if the dest type is a simple type, don't convert the
764 // expression tree to something weird like i93 unless the source is also
765 // strange.
766 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
767 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
768
769 // If this cast is a truncate, evaluting in a different type always
770 // eliminates the cast, so it is always a win.
771 LLVM_DEBUG(
772 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
773 " to avoid cast: "
774 << Trunc << '\n');
775 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
776 assert(Res->getType() == DestTy);
777 return replaceInstUsesWith(Trunc, Res);
778 }
779
780 // For integer types, check if we can shorten the entire input expression to
781 // DestWidth * 2, which won't allow removing the truncate, but reducing the
782 // width may enable further optimizations, e.g. allowing for larger
783 // vectorization factors.
784 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
785 if (DestWidth * 2 < SrcWidth) {
786 auto *NewDestTy = DestITy->getExtendedType();
787 if (shouldChangeType(SrcTy, NewDestTy) &&
788 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
789 LLVM_DEBUG(
790 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
791 " to reduce the width of operand of"
792 << Trunc << '\n');
793 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
794 return new TruncInst(Res, DestTy);
795 }
796 }
797 }
798
799 // See if we can simplify any instructions used by the input whose sole
800 // purpose is to compute bits we don't care about.
801 if (SimplifyDemandedInstructionBits(Trunc))
802 return &Trunc;
803
804 if (DestWidth == 1) {
805 Value *Zero = Constant::getNullValue(SrcTy);
806
807 Value *X;
808 const APInt *C1;
809 Constant *C2;
810 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
811 m_ImmConstant(C2))))) {
812 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
813 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
814 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
815 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
816 }
817
818 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
819 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
820 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
821 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
822 }
823
824 Constant *C;
825 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
826 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
827 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
828 Value *MaskC = Builder.CreateShl(One, C);
829 Value *And = Builder.CreateAnd(X, MaskC);
830 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
831 }
832 if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_ImmConstant(C)),
833 m_Deferred(X))))) {
834 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
835 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
836 Value *MaskC = Builder.CreateShl(One, C);
837 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
838 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
839 }
840
841 {
842 const APInt *C;
843 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
844 // trunc (C << X) to i1 --> X == 0, where C is odd
845 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
846 }
847 }
848
849 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
850 Value *X, *Y;
851 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
852 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
853 }
854 }
855
856 Value *A, *B;
857 Constant *C;
858 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
859 unsigned AWidth = A->getType()->getScalarSizeInBits();
860 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
861 auto *OldSh = cast<Instruction>(Src);
862 bool IsExact = OldSh->isExact();
863
864 // If the shift is small enough, all zero bits created by the shift are
865 // removed by the trunc.
866 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
867 APInt(SrcWidth, MaxShiftAmt)))) {
868 auto GetNewShAmt = [&](unsigned Width) {
869 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
870 Constant *Cmp =
871 ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, C, MaxAmt, DL);
872 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
873 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
874 DL);
875 };
876
877 // trunc (lshr (sext A), C) --> ashr A, C
878 if (A->getType() == DestTy) {
879 Constant *ShAmt = GetNewShAmt(DestWidth);
880 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
881 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
882 : BinaryOperator::CreateAShr(A, ShAmt);
883 }
884 // The types are mismatched, so create a cast after shifting:
885 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
886 if (Src->hasOneUse()) {
887 Constant *ShAmt = GetNewShAmt(AWidth);
888 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
889 return CastInst::CreateIntegerCast(Shift, DestTy, true);
890 }
891 }
892 // TODO: Mask high bits with 'and'.
893 }
894
895 if (Instruction *I = narrowBinOp(Trunc))
896 return I;
897
898 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
899 return I;
900
901 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
902 return I;
903
904 if (Src->hasOneUse() &&
905 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
906 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
907 // dest type is native and cst < dest size.
908 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
909 !match(A, m_Shr(m_Value(), m_Constant()))) {
910 // Skip shifts of shift by constants. It undoes a combine in
911 // FoldShiftByConstant and is the extend in reg pattern.
912 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
913 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
914 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
915 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
916 ConstantExpr::getTrunc(C, DestTy));
917 }
918 }
919 }
920
921 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
922 return I;
923
924 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
925 return I;
926
927 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
928 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
929 m_Value(B))))) {
930 unsigned AWidth = A->getType()->getScalarSizeInBits();
931 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
932 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
933 Value *NarrowCtlz =
934 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
935 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
936 }
937 }
938
939 if (match(Src, m_VScale())) {
940 if (Trunc.getFunction() &&
941 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
942 Attribute Attr =
943 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
944 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
945 if (Log2_32(*MaxVScale) < DestWidth)
946 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
947 }
948 }
949
950 if (DestWidth == 1 &&
951 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
952 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
953 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
954
955 bool Changed = false;
956 if (!Trunc.hasNoSignedWrap() &&
957 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
958 Trunc.setHasNoSignedWrap(true);
959 Changed = true;
960 }
961 if (!Trunc.hasNoUnsignedWrap() &&
962 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
963 &Trunc)) {
964 Trunc.setHasNoUnsignedWrap(true);
965 Changed = true;
966 }
967
968 return Changed ? &Trunc : nullptr;
969 }
970
transformZExtICmp(ICmpInst * Cmp,ZExtInst & Zext)971 Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
972 ZExtInst &Zext) {
973 // If we are just checking for a icmp eq of a single bit and zext'ing it
974 // to an integer, then shift the bit to the appropriate place and then
975 // cast to integer to avoid the comparison.
976
977 // FIXME: This set of transforms does not check for extra uses and/or creates
978 // an extra instruction (an optional final cast is not included
979 // in the transform comments). We may also want to favor icmp over
980 // shifts in cases of equal instructions because icmp has better
981 // analysis in general (invert the transform).
982
983 const APInt *Op1CV;
984 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
985
986 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
987 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
988 Value *In = Cmp->getOperand(0);
989 Value *Sh = ConstantInt::get(In->getType(),
990 In->getType()->getScalarSizeInBits() - 1);
991 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
992 if (In->getType() != Zext.getType())
993 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
994
995 return replaceInstUsesWith(Zext, In);
996 }
997
998 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
999 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1000 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1001 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1002
1003 if (Op1CV->isZero() && Cmp->isEquality()) {
1004 // Exactly 1 possible 1? But not the high-bit because that is
1005 // canonicalized to this form.
1006 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1007 APInt KnownZeroMask(~Known.Zero);
1008 uint32_t ShAmt = KnownZeroMask.logBase2();
1009 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1010 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1011 if (IsExpectShAmt &&
1012 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1013 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1014 Value *In = Cmp->getOperand(0);
1015 if (ShAmt) {
1016 // Perform a logical shr by shiftamt.
1017 // Insert the shift to put the result in the low bit.
1018 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1019 In->getName() + ".lobit");
1020 }
1021
1022 // Toggle the low bit for "X == 0".
1023 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1024 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1025
1026 if (Zext.getType() == In->getType())
1027 return replaceInstUsesWith(Zext, In);
1028
1029 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1030 return replaceInstUsesWith(Zext, IntCast);
1031 }
1032 }
1033 }
1034
1035 if (Cmp->isEquality()) {
1036 // Test if a bit is clear/set using a shifted-one mask:
1037 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1038 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1039 Value *X, *ShAmt;
1040 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1041 match(Cmp->getOperand(0),
1042 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1043 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1044 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1045 if (Zext.getType() == And->getType() ||
1046 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1047 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1048 X = Builder.CreateNot(X);
1049 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1050 Value *And1 =
1051 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1052 return replaceInstUsesWith(
1053 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1054 }
1055 }
1056 }
1057
1058 return nullptr;
1059 }
1060
1061 /// Determine if the specified value can be computed in the specified wider type
1062 /// and produce the same low bits. If not, return false.
1063 ///
1064 /// If this function returns true, it can also return a non-zero number of bits
1065 /// (in BitsToClear) which indicates that the value it computes is correct for
1066 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
1067 /// out. For example, to promote something like:
1068 ///
1069 /// %B = trunc i64 %A to i32
1070 /// %C = lshr i32 %B, 8
1071 /// %E = zext i32 %C to i64
1072 ///
1073 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1074 /// set to 8 to indicate that the promoted value needs to have bits 24-31
1075 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
1076 /// clear the top bits anyway, doing this has no extra cost.
1077 ///
1078 /// This function works on both vectors and scalars.
canEvaluateZExtd(Value * V,Type * Ty,unsigned & BitsToClear,InstCombinerImpl & IC,Instruction * CxtI)1079 static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1080 InstCombinerImpl &IC, Instruction *CxtI) {
1081 BitsToClear = 0;
1082 if (canAlwaysEvaluateInType(V, Ty))
1083 return true;
1084 if (canNotEvaluateInType(V, Ty))
1085 return false;
1086
1087 auto *I = cast<Instruction>(V);
1088 unsigned Tmp;
1089 switch (I->getOpcode()) {
1090 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1091 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1092 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1093 return true;
1094 case Instruction::And:
1095 case Instruction::Or:
1096 case Instruction::Xor:
1097 case Instruction::Add:
1098 case Instruction::Sub:
1099 case Instruction::Mul:
1100 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1101 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1102 return false;
1103 // These can all be promoted if neither operand has 'bits to clear'.
1104 if (BitsToClear == 0 && Tmp == 0)
1105 return true;
1106
1107 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1108 // other side, BitsToClear is ok.
1109 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1110 // We use MaskedValueIsZero here for generality, but the case we care
1111 // about the most is constant RHS.
1112 unsigned VSize = V->getType()->getScalarSizeInBits();
1113 if (IC.MaskedValueIsZero(I->getOperand(1),
1114 APInt::getHighBitsSet(VSize, BitsToClear),
1115 CxtI)) {
1116 // If this is an And instruction and all of the BitsToClear are
1117 // known to be zero we can reset BitsToClear.
1118 if (I->getOpcode() == Instruction::And)
1119 BitsToClear = 0;
1120 return true;
1121 }
1122 }
1123
1124 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1125 return false;
1126
1127 case Instruction::Shl: {
1128 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1129 // upper bits we can reduce BitsToClear by the shift amount.
1130 const APInt *Amt;
1131 if (match(I->getOperand(1), m_APInt(Amt))) {
1132 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1133 return false;
1134 uint64_t ShiftAmt = Amt->getZExtValue();
1135 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1136 return true;
1137 }
1138 return false;
1139 }
1140 case Instruction::LShr: {
1141 // We can promote lshr(x, cst) if we can promote x. This requires the
1142 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1143 const APInt *Amt;
1144 if (match(I->getOperand(1), m_APInt(Amt))) {
1145 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1146 return false;
1147 BitsToClear += Amt->getZExtValue();
1148 if (BitsToClear > V->getType()->getScalarSizeInBits())
1149 BitsToClear = V->getType()->getScalarSizeInBits();
1150 return true;
1151 }
1152 // Cannot promote variable LSHR.
1153 return false;
1154 }
1155 case Instruction::Select:
1156 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1157 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1158 // TODO: If important, we could handle the case when the BitsToClear are
1159 // known zero in the disagreeing side.
1160 Tmp != BitsToClear)
1161 return false;
1162 return true;
1163
1164 case Instruction::PHI: {
1165 // We can change a phi if we can change all operands. Note that we never
1166 // get into trouble with cyclic PHIs here because we only consider
1167 // instructions with a single use.
1168 PHINode *PN = cast<PHINode>(I);
1169 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1170 return false;
1171 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1172 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1173 // TODO: If important, we could handle the case when the BitsToClear
1174 // are known zero in the disagreeing input.
1175 Tmp != BitsToClear)
1176 return false;
1177 return true;
1178 }
1179 case Instruction::Call:
1180 // llvm.vscale() can always be executed in larger type, because the
1181 // value is automatically zero-extended.
1182 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1183 if (II->getIntrinsicID() == Intrinsic::vscale)
1184 return true;
1185 return false;
1186 default:
1187 // TODO: Can handle more cases here.
1188 return false;
1189 }
1190 }
1191
visitZExt(ZExtInst & Zext)1192 Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
1193 // If this zero extend is only used by a truncate, let the truncate be
1194 // eliminated before we try to optimize this zext.
1195 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1196 !isa<Constant>(Zext.getOperand(0)))
1197 return nullptr;
1198
1199 // If one of the common conversion will work, do it.
1200 if (Instruction *Result = commonCastTransforms(Zext))
1201 return Result;
1202
1203 Value *Src = Zext.getOperand(0);
1204 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1205
1206 // zext nneg bool x -> 0
1207 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1208 return replaceInstUsesWith(Zext, Constant::getNullValue(Zext.getType()));
1209
1210 // Try to extend the entire expression tree to the wide destination type.
1211 unsigned BitsToClear;
1212 if (shouldChangeType(SrcTy, DestTy) &&
1213 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1214 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1215 "Can't clear more bits than in SrcTy");
1216
1217 // Okay, we can transform this! Insert the new expression now.
1218 LLVM_DEBUG(
1219 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1220 " to avoid zero extend: "
1221 << Zext << '\n');
1222 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1223 assert(Res->getType() == DestTy);
1224
1225 // Preserve debug values referring to Src if the zext is its last use.
1226 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1227 if (SrcOp->hasOneUse())
1228 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1229
1230 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1231 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1232
1233 // If the high bits are already filled with zeros, just replace this
1234 // cast with the result.
1235 if (MaskedValueIsZero(
1236 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1237 &Zext))
1238 return replaceInstUsesWith(Zext, Res);
1239
1240 // We need to emit an AND to clear the high bits.
1241 Constant *C = ConstantInt::get(Res->getType(),
1242 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1243 return BinaryOperator::CreateAnd(Res, C);
1244 }
1245
1246 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1247 // types and if the sizes are just right we can convert this into a logical
1248 // 'and' which will be much cheaper than the pair of casts.
1249 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1250 // TODO: Subsume this into EvaluateInDifferentType.
1251
1252 // Get the sizes of the types involved. We know that the intermediate type
1253 // will be smaller than A or C, but don't know the relation between A and C.
1254 Value *A = CSrc->getOperand(0);
1255 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1256 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1257 unsigned DstSize = DestTy->getScalarSizeInBits();
1258 // If we're actually extending zero bits, then if
1259 // SrcSize < DstSize: zext(a & mask)
1260 // SrcSize == DstSize: a & mask
1261 // SrcSize > DstSize: trunc(a) & mask
1262 if (SrcSize < DstSize) {
1263 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1264 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1265 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1266 return new ZExtInst(And, DestTy);
1267 }
1268
1269 if (SrcSize == DstSize) {
1270 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1271 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1272 AndValue));
1273 }
1274 if (SrcSize > DstSize) {
1275 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1276 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1277 return BinaryOperator::CreateAnd(Trunc,
1278 ConstantInt::get(Trunc->getType(),
1279 AndValue));
1280 }
1281 }
1282
1283 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1284 return transformZExtICmp(Cmp, Zext);
1285
1286 // zext(trunc(X) & C) -> (X & zext(C)).
1287 Constant *C;
1288 Value *X;
1289 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1290 X->getType() == DestTy)
1291 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1292
1293 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1294 Value *And;
1295 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1296 match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
1297 X->getType() == DestTy) {
1298 Value *ZC = Builder.CreateZExt(C, DestTy);
1299 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1300 }
1301
1302 // If we are truncating, masking, and then zexting back to the original type,
1303 // that's just a mask. This is not handled by canEvaluateZextd if the
1304 // intermediate values have extra uses. This could be generalized further for
1305 // a non-constant mask operand.
1306 // zext (and (trunc X), C) --> and X, (zext C)
1307 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1308 X->getType() == DestTy) {
1309 Value *ZextC = Builder.CreateZExt(C, DestTy);
1310 return BinaryOperator::CreateAnd(X, ZextC);
1311 }
1312
1313 if (match(Src, m_VScale())) {
1314 if (Zext.getFunction() &&
1315 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1316 Attribute Attr =
1317 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1318 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1319 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1320 if (Log2_32(*MaxVScale) < TypeWidth)
1321 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1322 }
1323 }
1324 }
1325
1326 if (!Zext.hasNonNeg()) {
1327 // If this zero extend is only used by a shift, add nneg flag.
1328 if (Zext.hasOneUse() &&
1329 SrcTy->getScalarSizeInBits() >
1330 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1331 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1332 Zext.setNonNeg();
1333 return &Zext;
1334 }
1335
1336 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1337 Zext.setNonNeg();
1338 return &Zext;
1339 }
1340 }
1341
1342 return nullptr;
1343 }
1344
1345 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
transformSExtICmp(ICmpInst * Cmp,SExtInst & Sext)1346 Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1347 SExtInst &Sext) {
1348 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1349 ICmpInst::Predicate Pred = Cmp->getPredicate();
1350
1351 // Don't bother if Op1 isn't of vector or integer type.
1352 if (!Op1->getType()->isIntOrIntVectorTy())
1353 return nullptr;
1354
1355 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1356 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1357 Value *Sh = ConstantInt::get(Op0->getType(),
1358 Op0->getType()->getScalarSizeInBits() - 1);
1359 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1360 if (In->getType() != Sext.getType())
1361 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1362
1363 return replaceInstUsesWith(Sext, In);
1364 }
1365
1366 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1367 // If we know that only one bit of the LHS of the icmp can be set and we
1368 // have an equality comparison with zero or a power of 2, we can transform
1369 // the icmp and sext into bitwise/integer operations.
1370 if (Cmp->hasOneUse() &&
1371 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1372 KnownBits Known = computeKnownBits(Op0, &Sext);
1373
1374 APInt KnownZeroMask(~Known.Zero);
1375 if (KnownZeroMask.isPowerOf2()) {
1376 Value *In = Cmp->getOperand(0);
1377
1378 // If the icmp tests for a known zero bit we can constant fold it.
1379 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1380 Value *V = Pred == ICmpInst::ICMP_NE ?
1381 ConstantInt::getAllOnesValue(Sext.getType()) :
1382 ConstantInt::getNullValue(Sext.getType());
1383 return replaceInstUsesWith(Sext, V);
1384 }
1385
1386 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1387 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1388 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1389 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1390 // Perform a right shift to place the desired bit in the LSB.
1391 if (ShiftAmt)
1392 In = Builder.CreateLShr(In,
1393 ConstantInt::get(In->getType(), ShiftAmt));
1394
1395 // At this point "In" is either 1 or 0. Subtract 1 to turn
1396 // {1, 0} -> {0, -1}.
1397 In = Builder.CreateAdd(In,
1398 ConstantInt::getAllOnesValue(In->getType()),
1399 "sext");
1400 } else {
1401 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1402 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1403 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1404 // Perform a left shift to place the desired bit in the MSB.
1405 if (ShiftAmt)
1406 In = Builder.CreateShl(In,
1407 ConstantInt::get(In->getType(), ShiftAmt));
1408
1409 // Distribute the bit over the whole bit width.
1410 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1411 KnownZeroMask.getBitWidth() - 1), "sext");
1412 }
1413
1414 if (Sext.getType() == In->getType())
1415 return replaceInstUsesWith(Sext, In);
1416 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1417 }
1418 }
1419 }
1420
1421 return nullptr;
1422 }
1423
1424 /// Return true if we can take the specified value and return it as type Ty
1425 /// without inserting any new casts and without changing the value of the common
1426 /// low bits. This is used by code that tries to promote integer operations to
1427 /// a wider types will allow us to eliminate the extension.
1428 ///
1429 /// This function works on both vectors and scalars.
1430 ///
canEvaluateSExtd(Value * V,Type * Ty)1431 static bool canEvaluateSExtd(Value *V, Type *Ty) {
1432 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1433 "Can't sign extend type to a smaller type");
1434 if (canAlwaysEvaluateInType(V, Ty))
1435 return true;
1436 if (canNotEvaluateInType(V, Ty))
1437 return false;
1438
1439 auto *I = cast<Instruction>(V);
1440 switch (I->getOpcode()) {
1441 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1442 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1443 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1444 return true;
1445 case Instruction::And:
1446 case Instruction::Or:
1447 case Instruction::Xor:
1448 case Instruction::Add:
1449 case Instruction::Sub:
1450 case Instruction::Mul:
1451 // These operators can all arbitrarily be extended if their inputs can.
1452 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1453 canEvaluateSExtd(I->getOperand(1), Ty);
1454
1455 //case Instruction::Shl: TODO
1456 //case Instruction::LShr: TODO
1457
1458 case Instruction::Select:
1459 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1460 canEvaluateSExtd(I->getOperand(2), Ty);
1461
1462 case Instruction::PHI: {
1463 // We can change a phi if we can change all operands. Note that we never
1464 // get into trouble with cyclic PHIs here because we only consider
1465 // instructions with a single use.
1466 PHINode *PN = cast<PHINode>(I);
1467 for (Value *IncValue : PN->incoming_values())
1468 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1469 return true;
1470 }
1471 default:
1472 // TODO: Can handle more cases here.
1473 break;
1474 }
1475
1476 return false;
1477 }
1478
visitSExt(SExtInst & Sext)1479 Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
1480 // If this sign extend is only used by a truncate, let the truncate be
1481 // eliminated before we try to optimize this sext.
1482 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1483 return nullptr;
1484
1485 if (Instruction *I = commonCastTransforms(Sext))
1486 return I;
1487
1488 Value *Src = Sext.getOperand(0);
1489 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1490 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1491 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1492
1493 // If the value being extended is zero or positive, use a zext instead.
1494 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1495 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1496 CI->setNonNeg(true);
1497 return CI;
1498 }
1499
1500 // Try to extend the entire expression tree to the wide destination type.
1501 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1502 // Okay, we can transform this! Insert the new expression now.
1503 LLVM_DEBUG(
1504 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1505 " to avoid sign extend: "
1506 << Sext << '\n');
1507 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1508 assert(Res->getType() == DestTy);
1509
1510 // If the high bits are already filled with sign bit, just replace this
1511 // cast with the result.
1512 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1513 return replaceInstUsesWith(Sext, Res);
1514
1515 // We need to emit a shl + ashr to do the sign extend.
1516 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1517 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1518 ShAmt);
1519 }
1520
1521 Value *X;
1522 if (match(Src, m_Trunc(m_Value(X)))) {
1523 // If the input has more sign bits than bits truncated, then convert
1524 // directly to final type.
1525 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1526 if (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)
1527 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1528
1529 // If input is a trunc from the destination type, then convert into shifts.
1530 if (Src->hasOneUse() && X->getType() == DestTy) {
1531 // sext (trunc X) --> ashr (shl X, C), C
1532 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1533 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1534 }
1535
1536 // If we are replacing shifted-in high zero bits with sign bits, convert
1537 // the logic shift to arithmetic shift and eliminate the cast to
1538 // intermediate type:
1539 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1540 Value *Y;
1541 if (Src->hasOneUse() &&
1542 match(X, m_LShr(m_Value(Y),
1543 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1544 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1545 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1546 }
1547 }
1548
1549 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1550 return transformSExtICmp(Cmp, Sext);
1551
1552 // If the input is a shl/ashr pair of a same constant, then this is a sign
1553 // extension from a smaller value. If we could trust arbitrary bitwidth
1554 // integers, we could turn this into a truncate to the smaller bit and then
1555 // use a sext for the whole extension. Since we don't, look deeper and check
1556 // for a truncate. If the source and dest are the same type, eliminate the
1557 // trunc and extend and just do shifts. For example, turn:
1558 // %a = trunc i32 %i to i8
1559 // %b = shl i8 %a, C
1560 // %c = ashr i8 %b, C
1561 // %d = sext i8 %c to i32
1562 // into:
1563 // %a = shl i32 %i, 32-(8-C)
1564 // %d = ashr i32 %a, 32-(8-C)
1565 Value *A = nullptr;
1566 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1567 Constant *BA = nullptr, *CA = nullptr;
1568 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1569 m_ImmConstant(CA))) &&
1570 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1571 Constant *WideCurrShAmt =
1572 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1573 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1574 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1575 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1576 Constant *NewShAmt = ConstantExpr::getSub(
1577 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1578 NumLowbitsLeft);
1579 NewShAmt =
1580 Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);
1581 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1582 return BinaryOperator::CreateAShr(A, NewShAmt);
1583 }
1584
1585 // Splatting a bit of constant-index across a value:
1586 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1587 // If the dest type is different, use a cast (adjust use check).
1588 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1589 m_SpecificInt(SrcBitSize - 1))))) {
1590 Type *XTy = X->getType();
1591 unsigned XBitSize = XTy->getScalarSizeInBits();
1592 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1593 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1594 if (XTy == DestTy)
1595 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1596 AshrAmtC);
1597 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1598 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1599 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1600 }
1601 }
1602
1603 if (match(Src, m_VScale())) {
1604 if (Sext.getFunction() &&
1605 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1606 Attribute Attr =
1607 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1608 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1609 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1610 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1611 }
1612 }
1613
1614 return nullptr;
1615 }
1616
1617 /// Return a Constant* for the specified floating-point constant if it fits
1618 /// in the specified FP type without changing its value.
fitsInFPType(ConstantFP * CFP,const fltSemantics & Sem)1619 static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1620 bool losesInfo;
1621 APFloat F = CFP->getValueAPF();
1622 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1623 return !losesInfo;
1624 }
1625
shrinkFPConstant(ConstantFP * CFP,bool PreferBFloat)1626 static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1627 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1628 return nullptr; // No constant folding of this.
1629 // See if the value can be truncated to bfloat and then reextended.
1630 if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
1631 return Type::getBFloatTy(CFP->getContext());
1632 // See if the value can be truncated to half and then reextended.
1633 if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
1634 return Type::getHalfTy(CFP->getContext());
1635 // See if the value can be truncated to float and then reextended.
1636 if (fitsInFPType(CFP, APFloat::IEEEsingle()))
1637 return Type::getFloatTy(CFP->getContext());
1638 if (CFP->getType()->isDoubleTy())
1639 return nullptr; // Won't shrink.
1640 if (fitsInFPType(CFP, APFloat::IEEEdouble()))
1641 return Type::getDoubleTy(CFP->getContext());
1642 // Don't try to shrink to various long double types.
1643 return nullptr;
1644 }
1645
1646 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1647 // type we can safely truncate all elements to.
shrinkFPConstantVector(Value * V,bool PreferBFloat)1648 static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1649 auto *CV = dyn_cast<Constant>(V);
1650 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1651 if (!CV || !CVVTy)
1652 return nullptr;
1653
1654 Type *MinType = nullptr;
1655
1656 unsigned NumElts = CVVTy->getNumElements();
1657
1658 // For fixed-width vectors we find the minimal type by looking
1659 // through the constant values of the vector.
1660 for (unsigned i = 0; i != NumElts; ++i) {
1661 if (isa<UndefValue>(CV->getAggregateElement(i)))
1662 continue;
1663
1664 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1665 if (!CFP)
1666 return nullptr;
1667
1668 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1669 if (!T)
1670 return nullptr;
1671
1672 // If we haven't found a type yet or this type has a larger mantissa than
1673 // our previous type, this is our new minimal type.
1674 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1675 MinType = T;
1676 }
1677
1678 // Make a vector type from the minimal type.
1679 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1680 }
1681
1682 /// Find the minimum FP type we can safely truncate to.
getMinimumFPType(Value * V,bool PreferBFloat)1683 static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1684 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1685 return FPExt->getOperand(0)->getType();
1686
1687 // If this value is a constant, return the constant in the smallest FP type
1688 // that can accurately represent it. This allows us to turn
1689 // (float)((double)X+2.0) into x+2.0f.
1690 if (auto *CFP = dyn_cast<ConstantFP>(V))
1691 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1692 return T;
1693
1694 // Try to shrink scalable and fixed splat vectors.
1695 if (auto *FPC = dyn_cast<Constant>(V))
1696 if (isa<VectorType>(V->getType()))
1697 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
1698 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
1699 return T;
1700
1701 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1702 // vectors
1703 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1704 return T;
1705
1706 return V->getType();
1707 }
1708
1709 /// Return true if the cast from integer to FP can be proven to be exact for all
1710 /// possible inputs (the conversion does not lose any precision).
isKnownExactCastIntToFP(CastInst & I,InstCombinerImpl & IC)1711 static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
1712 CastInst::CastOps Opcode = I.getOpcode();
1713 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1714 "Unexpected cast");
1715 Value *Src = I.getOperand(0);
1716 Type *SrcTy = Src->getType();
1717 Type *FPTy = I.getType();
1718 bool IsSigned = Opcode == Instruction::SIToFP;
1719 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1720
1721 // Easy case - if the source integer type has less bits than the FP mantissa,
1722 // then the cast must be exact.
1723 int DestNumSigBits = FPTy->getFPMantissaWidth();
1724 if (SrcSize <= DestNumSigBits)
1725 return true;
1726
1727 // Cast from FP to integer and back to FP is independent of the intermediate
1728 // integer width because of poison on overflow.
1729 Value *F;
1730 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1731 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1732 // potential rounding of negative FP input values.
1733 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1734 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1735 SrcNumSigBits++;
1736
1737 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1738 // significant bits than the destination (and make sure neither type is
1739 // weird -- ppc_fp128).
1740 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1741 SrcNumSigBits <= DestNumSigBits)
1742 return true;
1743 }
1744
1745 // TODO:
1746 // Try harder to find if the source integer type has less significant bits.
1747 // For example, compute number of sign bits.
1748 KnownBits SrcKnown = IC.computeKnownBits(Src, &I);
1749 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1750 SrcKnown.countMinLeadingZeros() -
1751 SrcKnown.countMinTrailingZeros();
1752 if (SigBits <= DestNumSigBits)
1753 return true;
1754
1755 return false;
1756 }
1757
visitFPTrunc(FPTruncInst & FPT)1758 Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
1759 if (Instruction *I = commonCastTransforms(FPT))
1760 return I;
1761
1762 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1763 // simplify this expression to avoid one or more of the trunc/extend
1764 // operations if we can do so without changing the numerical results.
1765 //
1766 // The exact manner in which the widths of the operands interact to limit
1767 // what we can and cannot do safely varies from operation to operation, and
1768 // is explained below in the various case statements.
1769 Type *Ty = FPT.getType();
1770 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1771 if (BO && BO->hasOneUse()) {
1772 Type *LHSMinType =
1773 getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
1774 Type *RHSMinType =
1775 getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
1776 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1777 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1778 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1779 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1780 unsigned DstWidth = Ty->getFPMantissaWidth();
1781 switch (BO->getOpcode()) {
1782 default: break;
1783 case Instruction::FAdd:
1784 case Instruction::FSub:
1785 // For addition and subtraction, the infinitely precise result can
1786 // essentially be arbitrarily wide; proving that double rounding
1787 // will not occur because the result of OpI is exact (as we will for
1788 // FMul, for example) is hopeless. However, we *can* nonetheless
1789 // frequently know that double rounding cannot occur (or that it is
1790 // innocuous) by taking advantage of the specific structure of
1791 // infinitely-precise results that admit double rounding.
1792 //
1793 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1794 // to represent both sources, we can guarantee that the double
1795 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1796 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1797 // for proof of this fact).
1798 //
1799 // Note: Figueroa does not consider the case where DstFormat !=
1800 // SrcFormat. It's possible (likely even!) that this analysis
1801 // could be tightened for those cases, but they are rare (the main
1802 // case of interest here is (float)((double)float + float)).
1803 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1804 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1805 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1806 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1807 RI->copyFastMathFlags(BO);
1808 return RI;
1809 }
1810 break;
1811 case Instruction::FMul:
1812 // For multiplication, the infinitely precise result has at most
1813 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1814 // that such a value can be exactly represented, then no double
1815 // rounding can possibly occur; we can safely perform the operation
1816 // in the destination format if it can represent both sources.
1817 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1818 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1819 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1820 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
1821 }
1822 break;
1823 case Instruction::FDiv:
1824 // For division, we use again use the bound from Figueroa's
1825 // dissertation. I am entirely certain that this bound can be
1826 // tightened in the unbalanced operand case by an analysis based on
1827 // the diophantine rational approximation bound, but the well-known
1828 // condition used here is a good conservative first pass.
1829 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1830 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1831 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1832 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1833 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
1834 }
1835 break;
1836 case Instruction::FRem: {
1837 // Remainder is straightforward. Remainder is always exact, so the
1838 // type of OpI doesn't enter into things at all. We simply evaluate
1839 // in whichever source type is larger, then convert to the
1840 // destination type.
1841 if (SrcWidth == OpWidth)
1842 break;
1843 Value *LHS, *RHS;
1844 if (LHSWidth == SrcWidth) {
1845 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1846 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1847 } else {
1848 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1849 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1850 }
1851
1852 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1853 return CastInst::CreateFPCast(ExactResult, Ty);
1854 }
1855 }
1856 }
1857
1858 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1859 Value *X;
1860 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1861 if (Op && Op->hasOneUse()) {
1862 FastMathFlags FMF = FPT.getFastMathFlags();
1863 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
1864 FMF &= FPMO->getFastMathFlags();
1865
1866 if (match(Op, m_FNeg(m_Value(X)))) {
1867 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
1868 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
1869 return replaceInstUsesWith(FPT, Neg);
1870 }
1871
1872 // If we are truncating a select that has an extended operand, we can
1873 // narrow the other operand and do the select as a narrow op.
1874 Value *Cond, *X, *Y;
1875 if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1876 X->getType() == Ty) {
1877 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1878 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
1879 Value *Sel =
1880 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
1881 return replaceInstUsesWith(FPT, Sel);
1882 }
1883 if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1884 X->getType() == Ty) {
1885 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1886 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
1887 Value *Sel =
1888 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
1889 return replaceInstUsesWith(FPT, Sel);
1890 }
1891 }
1892
1893 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1894 switch (II->getIntrinsicID()) {
1895 default: break;
1896 case Intrinsic::ceil:
1897 case Intrinsic::fabs:
1898 case Intrinsic::floor:
1899 case Intrinsic::nearbyint:
1900 case Intrinsic::rint:
1901 case Intrinsic::round:
1902 case Intrinsic::roundeven:
1903 case Intrinsic::trunc: {
1904 Value *Src = II->getArgOperand(0);
1905 if (!Src->hasOneUse())
1906 break;
1907
1908 // Except for fabs, this transformation requires the input of the unary FP
1909 // operation to be itself an fpext from the type to which we're
1910 // truncating.
1911 if (II->getIntrinsicID() != Intrinsic::fabs) {
1912 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1913 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1914 break;
1915 }
1916
1917 // Do unary FP operation on smaller type.
1918 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1919 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1920 Function *Overload = Intrinsic::getOrInsertDeclaration(
1921 FPT.getModule(), II->getIntrinsicID(), Ty);
1922 SmallVector<OperandBundleDef, 1> OpBundles;
1923 II->getOperandBundlesAsDefs(OpBundles);
1924 CallInst *NewCI =
1925 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1926 // A normal value may be converted to an infinity. It means that we cannot
1927 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
1928 NewCI->copyFastMathFlags(&FPT);
1929 return NewCI;
1930 }
1931 }
1932 }
1933
1934 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1935 return I;
1936
1937 Value *Src = FPT.getOperand(0);
1938 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1939 auto *FPCast = cast<CastInst>(Src);
1940 if (isKnownExactCastIntToFP(*FPCast, *this))
1941 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1942 }
1943
1944 return nullptr;
1945 }
1946
visitFPExt(CastInst & FPExt)1947 Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
1948 // If the source operand is a cast from integer to FP and known exact, then
1949 // cast the integer operand directly to the destination type.
1950 Type *Ty = FPExt.getType();
1951 Value *Src = FPExt.getOperand(0);
1952 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1953 auto *FPCast = cast<CastInst>(Src);
1954 if (isKnownExactCastIntToFP(*FPCast, *this))
1955 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1956 }
1957
1958 return commonCastTransforms(FPExt);
1959 }
1960
1961 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1962 /// This is safe if the intermediate type has enough bits in its mantissa to
1963 /// accurately represent all values of X. For example, this won't work with
1964 /// i64 -> float -> i64.
foldItoFPtoI(CastInst & FI)1965 Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
1966 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1967 return nullptr;
1968
1969 auto *OpI = cast<CastInst>(FI.getOperand(0));
1970 Value *X = OpI->getOperand(0);
1971 Type *XType = X->getType();
1972 Type *DestType = FI.getType();
1973 bool IsOutputSigned = isa<FPToSIInst>(FI);
1974
1975 // Since we can assume the conversion won't overflow, our decision as to
1976 // whether the input will fit in the float should depend on the minimum
1977 // of the input range and output range.
1978
1979 // This means this is also safe for a signed input and unsigned output, since
1980 // a negative input would lead to undefined behavior.
1981 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1982 // The first cast may not round exactly based on the source integer width
1983 // and FP width, but the overflow UB rules can still allow this to fold.
1984 // If the destination type is narrow, that means the intermediate FP value
1985 // must be large enough to hold the source value exactly.
1986 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1987 int OutputSize = (int)DestType->getScalarSizeInBits();
1988 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1989 return nullptr;
1990 }
1991
1992 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1993 bool IsInputSigned = isa<SIToFPInst>(OpI);
1994 if (IsInputSigned && IsOutputSigned)
1995 return new SExtInst(X, DestType);
1996 return new ZExtInst(X, DestType);
1997 }
1998 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1999 return new TruncInst(X, DestType);
2000
2001 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2002 return replaceInstUsesWith(FI, X);
2003 }
2004
foldFPtoI(Instruction & FI,InstCombiner & IC)2005 static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {
2006 // fpto{u/s}i non-norm --> 0
2007 FPClassTest Mask =
2008 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2009 KnownFPClass FPClass = computeKnownFPClass(
2010 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2011 if (FPClass.isKnownNever(Mask))
2012 return IC.replaceInstUsesWith(FI, ConstantInt::getNullValue(FI.getType()));
2013
2014 return nullptr;
2015 }
2016
visitFPToUI(FPToUIInst & FI)2017 Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
2018 if (Instruction *I = foldItoFPtoI(FI))
2019 return I;
2020
2021 if (Instruction *I = foldFPtoI(FI, *this))
2022 return I;
2023
2024 return commonCastTransforms(FI);
2025 }
2026
visitFPToSI(FPToSIInst & FI)2027 Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
2028 if (Instruction *I = foldItoFPtoI(FI))
2029 return I;
2030
2031 if (Instruction *I = foldFPtoI(FI, *this))
2032 return I;
2033
2034 return commonCastTransforms(FI);
2035 }
2036
visitUIToFP(CastInst & CI)2037 Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
2038 if (Instruction *R = commonCastTransforms(CI))
2039 return R;
2040 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2041 CI.setNonNeg();
2042 return &CI;
2043 }
2044 return nullptr;
2045 }
2046
visitSIToFP(CastInst & CI)2047 Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
2048 if (Instruction *R = commonCastTransforms(CI))
2049 return R;
2050 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2051 auto *UI =
2052 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2053 UI->setNonNeg(true);
2054 return UI;
2055 }
2056 return nullptr;
2057 }
2058
visitIntToPtr(IntToPtrInst & CI)2059 Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
2060 // If the source integer type is not the intptr_t type for this target, do a
2061 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2062 // cast to be exposed to other transforms.
2063 unsigned AS = CI.getAddressSpace();
2064 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2065 DL.getPointerSizeInBits(AS)) {
2066 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2067 DL.getIntPtrType(CI.getContext(), AS));
2068 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2069 return new IntToPtrInst(P, CI.getType());
2070 }
2071
2072 if (Instruction *I = commonCastTransforms(CI))
2073 return I;
2074
2075 return nullptr;
2076 }
2077
foldPtrToIntOfGEP(Type * IntTy,Value * Ptr)2078 Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
2079 // Look through chain of one-use GEPs.
2080 Type *PtrTy = Ptr->getType();
2081 SmallVector<GEPOperator *> GEPs;
2082 while (true) {
2083 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2084 if (!GEP || !GEP->hasOneUse())
2085 break;
2086 GEPs.push_back(GEP);
2087 Ptr = GEP->getPointerOperand();
2088 }
2089
2090 // Don't handle case where GEP converts from pointer to vector.
2091 if (GEPs.empty() || PtrTy != Ptr->getType())
2092 return nullptr;
2093
2094 // Check whether we know the integer value of the base pointer.
2095 Value *Res;
2096 Type *IdxTy = DL.getIndexType(PtrTy);
2097 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2098 Res->getType() == IntTy && IntTy == IdxTy) {
2099 // pass
2100 } else if (isa<ConstantPointerNull>(Ptr)) {
2101 Res = Constant::getNullValue(IdxTy);
2102 } else {
2103 return nullptr;
2104 }
2105
2106 // Perform the entire operation on integers instead.
2107 for (GEPOperator *GEP : reverse(GEPs)) {
2108 Value *Offset = EmitGEPOffset(GEP);
2109 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2110 }
2111 return Builder.CreateZExtOrTrunc(Res, IntTy);
2112 }
2113
visitPtrToInt(PtrToIntInst & CI)2114 Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
2115 // If the destination integer type is not the intptr_t type for this target,
2116 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2117 // to be exposed to other transforms.
2118 Value *SrcOp = CI.getPointerOperand();
2119 Type *SrcTy = SrcOp->getType();
2120 Type *Ty = CI.getType();
2121 unsigned AS = CI.getPointerAddressSpace();
2122 unsigned TySize = Ty->getScalarSizeInBits();
2123 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2124 if (TySize != PtrSize) {
2125 Type *IntPtrTy =
2126 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2127 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2128 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2129 }
2130
2131 // (ptrtoint (ptrmask P, M))
2132 // -> (and (ptrtoint P), M)
2133 // This is generally beneficial as `and` is better supported than `ptrmask`.
2134 Value *Ptr, *Mask;
2135 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
2136 m_Value(Mask)))) &&
2137 Mask->getType() == Ty)
2138 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2139
2140 if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
2141 return replaceInstUsesWith(CI, V);
2142
2143 Value *Vec, *Scalar, *Index;
2144 if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
2145 m_Value(Scalar), m_Value(Index)))) &&
2146 Vec->getType() == Ty) {
2147 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2148 // Convert the scalar to int followed by insert to eliminate one cast:
2149 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2150 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2151 return InsertElementInst::Create(Vec, NewCast, Index);
2152 }
2153
2154 return commonCastTransforms(CI);
2155 }
2156
2157 /// This input value (which is known to have vector type) is being zero extended
2158 /// or truncated to the specified vector type. Since the zext/trunc is done
2159 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2160 /// endianness will impact which end of the vector that is extended or
2161 /// truncated.
2162 ///
2163 /// A vector is always stored with index 0 at the lowest address, which
2164 /// corresponds to the most significant bits for a big endian stored integer and
2165 /// the least significant bits for little endian. A trunc/zext of an integer
2166 /// impacts the big end of the integer. Thus, we need to add/remove elements at
2167 /// the front of the vector for big endian targets, and the back of the vector
2168 /// for little endian targets.
2169 ///
2170 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2171 ///
2172 /// The source and destination vector types may have different element types.
2173 static Instruction *
optimizeVectorResizeWithIntegerBitCasts(Value * InVal,VectorType * DestTy,InstCombinerImpl & IC)2174 optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
2175 InstCombinerImpl &IC) {
2176 // We can only do this optimization if the output is a multiple of the input
2177 // element size, or the input is a multiple of the output element size.
2178 // Convert the input type to have the same element type as the output.
2179 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2180
2181 if (SrcTy->getElementType() != DestTy->getElementType()) {
2182 // The input types don't need to be identical, but for now they must be the
2183 // same size. There is no specific reason we couldn't handle things like
2184 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2185 // there yet.
2186 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2187 DestTy->getElementType()->getPrimitiveSizeInBits())
2188 return nullptr;
2189
2190 SrcTy =
2191 FixedVectorType::get(DestTy->getElementType(),
2192 cast<FixedVectorType>(SrcTy)->getNumElements());
2193 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2194 }
2195
2196 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2197 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2198 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2199
2200 assert(SrcElts != DestElts && "Element counts should be different.");
2201
2202 // Now that the element types match, get the shuffle mask and RHS of the
2203 // shuffle to use, which depends on whether we're increasing or decreasing the
2204 // size of the input.
2205 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2206 ArrayRef<int> ShuffleMask;
2207 Value *V2;
2208
2209 if (SrcElts > DestElts) {
2210 // If we're shrinking the number of elements (rewriting an integer
2211 // truncate), just shuffle in the elements corresponding to the least
2212 // significant bits from the input and use poison as the second shuffle
2213 // input.
2214 V2 = PoisonValue::get(SrcTy);
2215 // Make sure the shuffle mask selects the "least significant bits" by
2216 // keeping elements from back of the src vector for big endian, and from the
2217 // front for little endian.
2218 ShuffleMask = ShuffleMaskStorage;
2219 if (IsBigEndian)
2220 ShuffleMask = ShuffleMask.take_back(DestElts);
2221 else
2222 ShuffleMask = ShuffleMask.take_front(DestElts);
2223 } else {
2224 // If we're increasing the number of elements (rewriting an integer zext),
2225 // shuffle in all of the elements from InVal. Fill the rest of the result
2226 // elements with zeros from a constant zero.
2227 V2 = Constant::getNullValue(SrcTy);
2228 // Use first elt from V2 when indicating zero in the shuffle mask.
2229 uint32_t NullElt = SrcElts;
2230 // Extend with null values in the "most significant bits" by adding elements
2231 // in front of the src vector for big endian, and at the back for little
2232 // endian.
2233 unsigned DeltaElts = DestElts - SrcElts;
2234 if (IsBigEndian)
2235 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2236 else
2237 ShuffleMaskStorage.append(DeltaElts, NullElt);
2238 ShuffleMask = ShuffleMaskStorage;
2239 }
2240
2241 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2242 }
2243
isMultipleOfTypeSize(unsigned Value,Type * Ty)2244 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2245 return Value % Ty->getPrimitiveSizeInBits() == 0;
2246 }
2247
getTypeSizeIndex(unsigned Value,Type * Ty)2248 static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2249 return Value / Ty->getPrimitiveSizeInBits();
2250 }
2251
2252 /// V is a value which is inserted into a vector of VecEltTy.
2253 /// Look through the value to see if we can decompose it into
2254 /// insertions into the vector. See the example in the comment for
2255 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2256 /// The type of V is always a non-zero multiple of VecEltTy's size.
2257 /// Shift is the number of bits between the lsb of V and the lsb of
2258 /// the vector.
2259 ///
2260 /// This returns false if the pattern can't be matched or true if it can,
2261 /// filling in Elements with the elements found here.
collectInsertionElements(Value * V,unsigned Shift,SmallVectorImpl<Value * > & Elements,Type * VecEltTy,bool isBigEndian)2262 static bool collectInsertionElements(Value *V, unsigned Shift,
2263 SmallVectorImpl<Value *> &Elements,
2264 Type *VecEltTy, bool isBigEndian) {
2265 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2266 "Shift should be a multiple of the element type size");
2267
2268 // Undef values never contribute useful bits to the result.
2269 if (isa<UndefValue>(V)) return true;
2270
2271 // If we got down to a value of the right type, we win, try inserting into the
2272 // right element.
2273 if (V->getType() == VecEltTy) {
2274 // Inserting null doesn't actually insert any elements.
2275 if (Constant *C = dyn_cast<Constant>(V))
2276 if (C->isNullValue())
2277 return true;
2278
2279 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2280 if (isBigEndian)
2281 ElementIndex = Elements.size() - ElementIndex - 1;
2282
2283 // Fail if multiple elements are inserted into this slot.
2284 if (Elements[ElementIndex])
2285 return false;
2286
2287 Elements[ElementIndex] = V;
2288 return true;
2289 }
2290
2291 if (Constant *C = dyn_cast<Constant>(V)) {
2292 // Figure out the # elements this provides, and bitcast it or slice it up
2293 // as required.
2294 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2295 VecEltTy);
2296 // If the constant is the size of a vector element, we just need to bitcast
2297 // it to the right type so it gets properly inserted.
2298 if (NumElts == 1)
2299 return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
2300 Shift, Elements, VecEltTy, isBigEndian);
2301
2302 // Okay, this is a constant that covers multiple elements. Slice it up into
2303 // pieces and insert each element-sized piece into the vector.
2304 if (!isa<IntegerType>(C->getType()))
2305 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2306 C->getType()->getPrimitiveSizeInBits()));
2307 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2308 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2309
2310 for (unsigned i = 0; i != NumElts; ++i) {
2311 unsigned ShiftI = i * ElementSize;
2312 Constant *Piece = ConstantFoldBinaryInstruction(
2313 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2314 if (!Piece)
2315 return false;
2316
2317 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2318 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2319 isBigEndian))
2320 return false;
2321 }
2322 return true;
2323 }
2324
2325 if (!V->hasOneUse()) return false;
2326
2327 Instruction *I = dyn_cast<Instruction>(V);
2328 if (!I) return false;
2329 switch (I->getOpcode()) {
2330 default: return false; // Unhandled case.
2331 case Instruction::BitCast:
2332 if (I->getOperand(0)->getType()->isVectorTy())
2333 return false;
2334 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2335 isBigEndian);
2336 case Instruction::ZExt:
2337 if (!isMultipleOfTypeSize(
2338 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2339 VecEltTy))
2340 return false;
2341 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2342 isBigEndian);
2343 case Instruction::Or:
2344 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2345 isBigEndian) &&
2346 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2347 isBigEndian);
2348 case Instruction::Shl: {
2349 // Must be shifting by a constant that is a multiple of the element size.
2350 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2351 if (!CI) return false;
2352 Shift += CI->getZExtValue();
2353 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2354 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2355 isBigEndian);
2356 }
2357
2358 }
2359 }
2360
2361
2362 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2363 /// assemble the elements of the vector manually.
2364 /// Try to rip the code out and replace it with insertelements. This is to
2365 /// optimize code like this:
2366 ///
2367 /// %tmp37 = bitcast float %inc to i32
2368 /// %tmp38 = zext i32 %tmp37 to i64
2369 /// %tmp31 = bitcast float %inc5 to i32
2370 /// %tmp32 = zext i32 %tmp31 to i64
2371 /// %tmp33 = shl i64 %tmp32, 32
2372 /// %ins35 = or i64 %tmp33, %tmp38
2373 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2374 ///
2375 /// Into two insertelements that do "buildvector{%inc, %inc5}".
optimizeIntegerToVectorInsertions(BitCastInst & CI,InstCombinerImpl & IC)2376 static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
2377 InstCombinerImpl &IC) {
2378 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2379 Value *IntInput = CI.getOperand(0);
2380
2381 // if the int input is just an undef value do not try to optimize to vector
2382 // insertions as it will prevent undef propagation
2383 if (isa<UndefValue>(IntInput))
2384 return nullptr;
2385
2386 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2387 if (!collectInsertionElements(IntInput, 0, Elements,
2388 DestVecTy->getElementType(),
2389 IC.getDataLayout().isBigEndian()))
2390 return nullptr;
2391
2392 // If we succeeded, we know that all of the element are specified by Elements
2393 // or are zero if Elements has a null entry. Recast this as a set of
2394 // insertions.
2395 Value *Result = Constant::getNullValue(CI.getType());
2396 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2397 if (!Elements[i]) continue; // Unset element.
2398
2399 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2400 IC.Builder.getInt32(i));
2401 }
2402
2403 return Result;
2404 }
2405
2406 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2407 /// vector followed by extract element. The backend tends to handle bitcasts of
2408 /// vectors better than bitcasts of scalars because vector registers are
2409 /// usually not type-specific like scalar integer or scalar floating-point.
canonicalizeBitCastExtElt(BitCastInst & BitCast,InstCombinerImpl & IC)2410 static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
2411 InstCombinerImpl &IC) {
2412 Value *VecOp, *Index;
2413 if (!match(BitCast.getOperand(0),
2414 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2415 return nullptr;
2416
2417 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2418 // type to extract from.
2419 Type *DestType = BitCast.getType();
2420 VectorType *VecType = cast<VectorType>(VecOp->getType());
2421 if (VectorType::isValidElementType(DestType)) {
2422 auto *NewVecType = VectorType::get(DestType, VecType);
2423 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2424 return ExtractElementInst::Create(NewBC, Index);
2425 }
2426
2427 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2428 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2429 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2430 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2431 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2432
2433 return nullptr;
2434 }
2435
2436 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
foldBitCastBitwiseLogic(BitCastInst & BitCast,InstCombiner::BuilderTy & Builder)2437 static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
2438 InstCombiner::BuilderTy &Builder) {
2439 Type *DestTy = BitCast.getType();
2440 BinaryOperator *BO;
2441
2442 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2443 !BO->isBitwiseLogicOp())
2444 return nullptr;
2445
2446 // FIXME: This transform is restricted to vector types to avoid backend
2447 // problems caused by creating potentially illegal operations. If a fix-up is
2448 // added to handle that situation, we can remove this check.
2449 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2450 return nullptr;
2451
2452 if (DestTy->isFPOrFPVectorTy()) {
2453 Value *X, *Y;
2454 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2455 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2456 match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) {
2457 if (X->getType()->isFPOrFPVectorTy() &&
2458 Y->getType()->isIntOrIntVectorTy()) {
2459 Value *CastedOp =
2460 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2461 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2462 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2463 }
2464 if (X->getType()->isIntOrIntVectorTy() &&
2465 Y->getType()->isFPOrFPVectorTy()) {
2466 Value *CastedOp =
2467 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2468 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2469 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2470 }
2471 }
2472 return nullptr;
2473 }
2474
2475 if (!DestTy->isIntOrIntVectorTy())
2476 return nullptr;
2477
2478 Value *X;
2479 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2480 X->getType() == DestTy && !isa<Constant>(X)) {
2481 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2482 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2483 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2484 }
2485
2486 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2487 X->getType() == DestTy && !isa<Constant>(X)) {
2488 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2489 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2490 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2491 }
2492
2493 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2494 // constant. This eases recognition of special constants for later ops.
2495 // Example:
2496 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2497 Constant *C;
2498 if (match(BO->getOperand(1), m_Constant(C))) {
2499 // bitcast (logic X, C) --> logic (bitcast X, C')
2500 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2501 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2502 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2503 }
2504
2505 return nullptr;
2506 }
2507
2508 /// Change the type of a select if we can eliminate a bitcast.
foldBitCastSelect(BitCastInst & BitCast,InstCombiner::BuilderTy & Builder)2509 static Instruction *foldBitCastSelect(BitCastInst &BitCast,
2510 InstCombiner::BuilderTy &Builder) {
2511 Value *Cond, *TVal, *FVal;
2512 if (!match(BitCast.getOperand(0),
2513 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2514 return nullptr;
2515
2516 // A vector select must maintain the same number of elements in its operands.
2517 Type *CondTy = Cond->getType();
2518 Type *DestTy = BitCast.getType();
2519 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2520 if (!DestTy->isVectorTy() ||
2521 CondVTy->getElementCount() !=
2522 cast<VectorType>(DestTy)->getElementCount())
2523 return nullptr;
2524
2525 // FIXME: This transform is restricted from changing the select between
2526 // scalars and vectors to avoid backend problems caused by creating
2527 // potentially illegal operations. If a fix-up is added to handle that
2528 // situation, we can remove this check.
2529 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2530 return nullptr;
2531
2532 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2533 Value *X;
2534 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2535 !isa<Constant>(X)) {
2536 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2537 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2538 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2539 }
2540
2541 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2542 !isa<Constant>(X)) {
2543 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2544 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2545 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2546 }
2547
2548 return nullptr;
2549 }
2550
2551 /// Check if all users of CI are StoreInsts.
hasStoreUsersOnly(CastInst & CI)2552 static bool hasStoreUsersOnly(CastInst &CI) {
2553 for (User *U : CI.users()) {
2554 if (!isa<StoreInst>(U))
2555 return false;
2556 }
2557 return true;
2558 }
2559
2560 /// This function handles following case
2561 ///
2562 /// A -> B cast
2563 /// PHI
2564 /// B -> A cast
2565 ///
2566 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2567 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
optimizeBitCastFromPhi(CastInst & CI,PHINode * PN)2568 Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2569 PHINode *PN) {
2570 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2571 if (hasStoreUsersOnly(CI))
2572 return nullptr;
2573
2574 Value *Src = CI.getOperand(0);
2575 Type *SrcTy = Src->getType(); // Type B
2576 Type *DestTy = CI.getType(); // Type A
2577
2578 SmallVector<PHINode *, 4> PhiWorklist;
2579 SmallSetVector<PHINode *, 4> OldPhiNodes;
2580
2581 // Find all of the A->B casts and PHI nodes.
2582 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2583 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2584 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2585 PhiWorklist.push_back(PN);
2586 OldPhiNodes.insert(PN);
2587 while (!PhiWorklist.empty()) {
2588 auto *OldPN = PhiWorklist.pop_back_val();
2589 for (Value *IncValue : OldPN->incoming_values()) {
2590 if (isa<Constant>(IncValue))
2591 continue;
2592
2593 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2594 // If there is a sequence of one or more load instructions, each loaded
2595 // value is used as address of later load instruction, bitcast is
2596 // necessary to change the value type, don't optimize it. For
2597 // simplicity we give up if the load address comes from another load.
2598 Value *Addr = LI->getOperand(0);
2599 if (Addr == &CI || isa<LoadInst>(Addr))
2600 return nullptr;
2601 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2602 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2603 // TODO: Remove this check when bitcast between vector and x86_amx
2604 // is replaced with a specific intrinsic.
2605 if (DestTy->isX86_AMXTy())
2606 return nullptr;
2607 if (LI->hasOneUse() && LI->isSimple())
2608 continue;
2609 // If a LoadInst has more than one use, changing the type of loaded
2610 // value may create another bitcast.
2611 return nullptr;
2612 }
2613
2614 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2615 if (OldPhiNodes.insert(PNode))
2616 PhiWorklist.push_back(PNode);
2617 continue;
2618 }
2619
2620 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2621 // We can't handle other instructions.
2622 if (!BCI)
2623 return nullptr;
2624
2625 // Verify it's a A->B cast.
2626 Type *TyA = BCI->getOperand(0)->getType();
2627 Type *TyB = BCI->getType();
2628 if (TyA != DestTy || TyB != SrcTy)
2629 return nullptr;
2630 }
2631 }
2632
2633 // Check that each user of each old PHI node is something that we can
2634 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2635 for (auto *OldPN : OldPhiNodes) {
2636 for (User *V : OldPN->users()) {
2637 if (auto *SI = dyn_cast<StoreInst>(V)) {
2638 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2639 return nullptr;
2640 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2641 // Verify it's a B->A cast.
2642 Type *TyB = BCI->getOperand(0)->getType();
2643 Type *TyA = BCI->getType();
2644 if (TyA != DestTy || TyB != SrcTy)
2645 return nullptr;
2646 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2647 // As long as the user is another old PHI node, then even if we don't
2648 // rewrite it, the PHI web we're considering won't have any users
2649 // outside itself, so it'll be dead.
2650 if (!OldPhiNodes.contains(PHI))
2651 return nullptr;
2652 } else {
2653 return nullptr;
2654 }
2655 }
2656 }
2657
2658 // For each old PHI node, create a corresponding new PHI node with a type A.
2659 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
2660 for (auto *OldPN : OldPhiNodes) {
2661 Builder.SetInsertPoint(OldPN);
2662 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2663 NewPNodes[OldPN] = NewPN;
2664 }
2665
2666 // Fill in the operands of new PHI nodes.
2667 for (auto *OldPN : OldPhiNodes) {
2668 PHINode *NewPN = NewPNodes[OldPN];
2669 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2670 Value *V = OldPN->getOperand(j);
2671 Value *NewV = nullptr;
2672 if (auto *C = dyn_cast<Constant>(V)) {
2673 NewV = ConstantExpr::getBitCast(C, DestTy);
2674 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2675 // Explicitly perform load combine to make sure no opposing transform
2676 // can remove the bitcast in the meantime and trigger an infinite loop.
2677 Builder.SetInsertPoint(LI);
2678 NewV = combineLoadToNewType(*LI, DestTy);
2679 // Remove the old load and its use in the old phi, which itself becomes
2680 // dead once the whole transform finishes.
2681 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
2682 eraseInstFromFunction(*LI);
2683 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2684 NewV = BCI->getOperand(0);
2685 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2686 NewV = NewPNodes[PrevPN];
2687 }
2688 assert(NewV);
2689 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2690 }
2691 }
2692
2693 // Traverse all accumulated PHI nodes and process its users,
2694 // which are Stores and BitcCasts. Without this processing
2695 // NewPHI nodes could be replicated and could lead to extra
2696 // moves generated after DeSSA.
2697 // If there is a store with type B, change it to type A.
2698
2699
2700 // Replace users of BitCast B->A with NewPHI. These will help
2701 // later to get rid off a closure formed by OldPHI nodes.
2702 Instruction *RetVal = nullptr;
2703 for (auto *OldPN : OldPhiNodes) {
2704 PHINode *NewPN = NewPNodes[OldPN];
2705 for (User *V : make_early_inc_range(OldPN->users())) {
2706 if (auto *SI = dyn_cast<StoreInst>(V)) {
2707 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2708 Builder.SetInsertPoint(SI);
2709 auto *NewBC =
2710 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2711 SI->setOperand(0, NewBC);
2712 Worklist.push(SI);
2713 assert(hasStoreUsersOnly(*NewBC));
2714 }
2715 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2716 Type *TyB = BCI->getOperand(0)->getType();
2717 Type *TyA = BCI->getType();
2718 assert(TyA == DestTy && TyB == SrcTy);
2719 (void) TyA;
2720 (void) TyB;
2721 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2722 if (BCI == &CI)
2723 RetVal = I;
2724 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2725 assert(OldPhiNodes.contains(PHI));
2726 (void) PHI;
2727 } else {
2728 llvm_unreachable("all uses should be handled");
2729 }
2730 }
2731 }
2732
2733 return RetVal;
2734 }
2735
2736 /// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
2737 /// copysign((bitcast Y to fp), X)
foldCopySignIdioms(BitCastInst & CI,InstCombiner::BuilderTy & Builder,const SimplifyQuery & SQ)2738 static Value *foldCopySignIdioms(BitCastInst &CI,
2739 InstCombiner::BuilderTy &Builder,
2740 const SimplifyQuery &SQ) {
2741 Value *X, *Y;
2742 Type *FTy = CI.getType();
2743 if (!FTy->isFPOrFPVectorTy())
2744 return nullptr;
2745 if (!match(&CI, m_ElementWiseBitCast(m_c_Or(
2746 m_And(m_ElementWiseBitCast(m_Value(X)), m_SignMask()),
2747 m_Value(Y)))))
2748 return nullptr;
2749 if (X->getType() != FTy)
2750 return nullptr;
2751 if (!isKnownNonNegative(Y, SQ))
2752 return nullptr;
2753
2754 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
2755 }
2756
visitBitCast(BitCastInst & CI)2757 Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
2758 // If the operands are integer typed then apply the integer transforms,
2759 // otherwise just apply the common ones.
2760 Value *Src = CI.getOperand(0);
2761 Type *SrcTy = Src->getType();
2762 Type *DestTy = CI.getType();
2763
2764 // Get rid of casts from one type to the same type. These are useless and can
2765 // be replaced by the operand.
2766 if (DestTy == Src->getType())
2767 return replaceInstUsesWith(CI, Src);
2768
2769 if (isa<FixedVectorType>(DestTy)) {
2770 if (isa<IntegerType>(SrcTy)) {
2771 // If this is a cast from an integer to vector, check to see if the input
2772 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2773 // the casts with a shuffle and (potentially) a bitcast.
2774 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2775 CastInst *SrcCast = cast<CastInst>(Src);
2776 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2777 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2778 if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
2779 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2780 return I;
2781 }
2782
2783 // If the input is an 'or' instruction, we may be doing shifts and ors to
2784 // assemble the elements of the vector manually. Try to rip the code out
2785 // and replace it with insertelements.
2786 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2787 return replaceInstUsesWith(CI, V);
2788 }
2789 }
2790
2791 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2792 if (SrcVTy->getNumElements() == 1) {
2793 // If our destination is not a vector, then make this a straight
2794 // scalar-scalar cast.
2795 if (!DestTy->isVectorTy()) {
2796 Value *Elem =
2797 Builder.CreateExtractElement(Src,
2798 Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
2799 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2800 }
2801
2802 // Otherwise, see if our source is an insert. If so, then use the scalar
2803 // component directly:
2804 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2805 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2806 return new BitCastInst(InsElt->getOperand(1), DestTy);
2807 }
2808
2809 // Convert an artificial vector insert into more analyzable bitwise logic.
2810 unsigned BitWidth = DestTy->getScalarSizeInBits();
2811 Value *X, *Y;
2812 uint64_t IndexC;
2813 if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
2814 m_Value(Y), m_ConstantInt(IndexC)))) &&
2815 DestTy->isIntegerTy() && X->getType() == DestTy &&
2816 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2817 // Adjust for big endian - the LSBs are at the high index.
2818 if (DL.isBigEndian())
2819 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2820
2821 // We only handle (endian-normalized) insert to index 0. Any other insert
2822 // would require a left-shift, so that is an extra instruction.
2823 if (IndexC == 0) {
2824 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2825 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2826 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2827 Value *AndX = Builder.CreateAnd(X, MaskC);
2828 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2829 return BinaryOperator::CreateOr(AndX, ZextY);
2830 }
2831 }
2832 }
2833
2834 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2835 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2836 // a bitcast to a vector with the same # elts.
2837 Value *ShufOp0 = Shuf->getOperand(0);
2838 Value *ShufOp1 = Shuf->getOperand(1);
2839 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2840 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2841 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2842 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2843 ShufElts == SrcVecElts) {
2844 BitCastInst *Tmp;
2845 // If either of the operands is a cast from CI.getType(), then
2846 // evaluating the shuffle in the casted destination's type will allow
2847 // us to eliminate at least one cast.
2848 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2849 Tmp->getOperand(0)->getType() == DestTy) ||
2850 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2851 Tmp->getOperand(0)->getType() == DestTy)) {
2852 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2853 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2854 // Return a new shuffle vector. Use the same element ID's, as we
2855 // know the vector types match #elts.
2856 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2857 }
2858 }
2859
2860 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2861 // as a byte/bit swap:
2862 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2863 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2864 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2865 Shuf->hasOneUse() && Shuf->isReverse()) {
2866 unsigned IntrinsicNum = 0;
2867 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2868 SrcTy->getScalarSizeInBits() == 8) {
2869 IntrinsicNum = Intrinsic::bswap;
2870 } else if (SrcTy->getScalarSizeInBits() == 1) {
2871 IntrinsicNum = Intrinsic::bitreverse;
2872 }
2873 if (IntrinsicNum != 0) {
2874 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2875 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2876 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
2877 CI.getModule(), IntrinsicNum, DestTy);
2878 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2879 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2880 }
2881 }
2882 }
2883
2884 // Handle the A->B->A cast, and there is an intervening PHI node.
2885 if (PHINode *PN = dyn_cast<PHINode>(Src))
2886 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2887 return I;
2888
2889 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2890 return I;
2891
2892 if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
2893 return I;
2894
2895 if (Instruction *I = foldBitCastSelect(CI, Builder))
2896 return I;
2897
2898 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
2899 return replaceInstUsesWith(CI, V);
2900
2901 return commonCastTransforms(CI);
2902 }
2903
visitAddrSpaceCast(AddrSpaceCastInst & CI)2904 Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
2905 return commonCastTransforms(CI);
2906 }
2907