xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include <cstring>
45 
46 using namespace llvm;
47 
48 static cl::opt<bool>
49     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50                                 cl::desc("Disable autoupgrade of debug info"));
51 
52 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53 
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
56 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
57                                   Function *&NewFn) {
58   // Check whether this is an old version of the function, which received
59   // v4f32 arguments.
60   Type *Arg0Type = F->getFunctionType()->getParamType(0);
61   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62     return false;
63 
64   // Yes, it's old, replace it with new version.
65   rename(F);
66   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67   return true;
68 }
69 
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
72 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73                                              Function *&NewFn) {
74   // Check that the last argument is an i32.
75   Type *LastArgType = F->getFunctionType()->getParamType(
76      F->getFunctionType()->getNumParams() - 1);
77   if (!LastArgType->isIntegerTy(32))
78     return false;
79 
80   // Move this function aside and map down.
81   rename(F);
82   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83   return true;
84 }
85 
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
88 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89                                       Function *&NewFn) {
90   // Check if the return type is a vector.
91   if (F->getReturnType()->isVectorTy())
92     return false;
93 
94   rename(F);
95   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96   return true;
97 }
98 
99 static bool UpgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100                                     Function *&NewFn) {
101   if (F->getReturnType()->getScalarType()->isBFloatTy())
102     return false;
103 
104   rename(F);
105   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106   return true;
107 }
108 
109 static bool UpgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110                                       Function *&NewFn) {
111   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112     return false;
113 
114   rename(F);
115   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116   return true;
117 }
118 
119 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120   // All of the intrinsics matches below should be marked with which llvm
121   // version started autoupgrading them. At some point in the future we would
122   // like to use this information to remove upgrade code for some older
123   // intrinsics. It is currently undecided how we will determine that future
124   // point.
125   if (Name == "addcarryx.u32" || // Added in 8.0
126       Name == "addcarryx.u64" || // Added in 8.0
127       Name == "addcarry.u32" || // Added in 8.0
128       Name == "addcarry.u64" || // Added in 8.0
129       Name == "subborrow.u32" || // Added in 8.0
130       Name == "subborrow.u64" || // Added in 8.0
131       Name.startswith("sse2.padds.") || // Added in 8.0
132       Name.startswith("sse2.psubs.") || // Added in 8.0
133       Name.startswith("sse2.paddus.") || // Added in 8.0
134       Name.startswith("sse2.psubus.") || // Added in 8.0
135       Name.startswith("avx2.padds.") || // Added in 8.0
136       Name.startswith("avx2.psubs.") || // Added in 8.0
137       Name.startswith("avx2.paddus.") || // Added in 8.0
138       Name.startswith("avx2.psubus.") || // Added in 8.0
139       Name.startswith("avx512.padds.") || // Added in 8.0
140       Name.startswith("avx512.psubs.") || // Added in 8.0
141       Name.startswith("avx512.mask.padds.") || // Added in 8.0
142       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
143       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
144       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
145       Name=="ssse3.pabs.b.128" || // Added in 6.0
146       Name=="ssse3.pabs.w.128" || // Added in 6.0
147       Name=="ssse3.pabs.d.128" || // Added in 6.0
148       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
149       Name.startswith("fma.vfmadd.") || // Added in 7.0
150       Name.startswith("fma.vfmsub.") || // Added in 7.0
151       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
152       Name.startswith("fma.vfnmadd.") || // Added in 7.0
153       Name.startswith("fma.vfnmsub.") || // Added in 7.0
154       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
155       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
156       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
157       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
158       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
159       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
160       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
161       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
162       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
163       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
164       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
165       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
166       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
167       Name.startswith("avx512.kunpck") || //added in 6.0
168       Name.startswith("avx2.pabs.") || // Added in 6.0
169       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
170       Name.startswith("avx512.broadcastm") || // Added in 6.0
171       Name == "sse.sqrt.ss" || // Added in 7.0
172       Name == "sse2.sqrt.sd" || // Added in 7.0
173       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
174       Name.startswith("avx.sqrt.p") || // Added in 7.0
175       Name.startswith("sse2.sqrt.p") || // Added in 7.0
176       Name.startswith("sse.sqrt.p") || // Added in 7.0
177       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
178       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
179       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
180       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
181       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
182       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
183       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
184       Name.startswith("avx.vperm2f128.") || // Added in 6.0
185       Name == "avx2.vperm2i128" || // Added in 6.0
186       Name == "sse.add.ss" || // Added in 4.0
187       Name == "sse2.add.sd" || // Added in 4.0
188       Name == "sse.sub.ss" || // Added in 4.0
189       Name == "sse2.sub.sd" || // Added in 4.0
190       Name == "sse.mul.ss" || // Added in 4.0
191       Name == "sse2.mul.sd" || // Added in 4.0
192       Name == "sse.div.ss" || // Added in 4.0
193       Name == "sse2.div.sd" || // Added in 4.0
194       Name == "sse41.pmaxsb" || // Added in 3.9
195       Name == "sse2.pmaxs.w" || // Added in 3.9
196       Name == "sse41.pmaxsd" || // Added in 3.9
197       Name == "sse2.pmaxu.b" || // Added in 3.9
198       Name == "sse41.pmaxuw" || // Added in 3.9
199       Name == "sse41.pmaxud" || // Added in 3.9
200       Name == "sse41.pminsb" || // Added in 3.9
201       Name == "sse2.pmins.w" || // Added in 3.9
202       Name == "sse41.pminsd" || // Added in 3.9
203       Name == "sse2.pminu.b" || // Added in 3.9
204       Name == "sse41.pminuw" || // Added in 3.9
205       Name == "sse41.pminud" || // Added in 3.9
206       Name == "avx512.kand.w" || // Added in 7.0
207       Name == "avx512.kandn.w" || // Added in 7.0
208       Name == "avx512.knot.w" || // Added in 7.0
209       Name == "avx512.kor.w" || // Added in 7.0
210       Name == "avx512.kxor.w" || // Added in 7.0
211       Name == "avx512.kxnor.w" || // Added in 7.0
212       Name == "avx512.kortestc.w" || // Added in 7.0
213       Name == "avx512.kortestz.w" || // Added in 7.0
214       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
215       Name.startswith("avx2.pmax") || // Added in 3.9
216       Name.startswith("avx2.pmin") || // Added in 3.9
217       Name.startswith("avx512.mask.pmax") || // Added in 4.0
218       Name.startswith("avx512.mask.pmin") || // Added in 4.0
219       Name.startswith("avx2.vbroadcast") || // Added in 3.8
220       Name.startswith("avx2.pbroadcast") || // Added in 3.8
221       Name.startswith("avx.vpermil.") || // Added in 3.1
222       Name.startswith("sse2.pshuf") || // Added in 3.9
223       Name.startswith("avx512.pbroadcast") || // Added in 3.9
224       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
225       Name.startswith("avx512.mask.movddup") || // Added in 3.9
226       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
227       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
228       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
229       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
230       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
231       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
232       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
233       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
234       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
235       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
236       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
237       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
238       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
239       Name.startswith("avx512.mask.pand.") || // Added in 3.9
240       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
241       Name.startswith("avx512.mask.por.") || // Added in 3.9
242       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
243       Name.startswith("avx512.mask.and.") || // Added in 3.9
244       Name.startswith("avx512.mask.andn.") || // Added in 3.9
245       Name.startswith("avx512.mask.or.") || // Added in 3.9
246       Name.startswith("avx512.mask.xor.") || // Added in 3.9
247       Name.startswith("avx512.mask.padd.") || // Added in 4.0
248       Name.startswith("avx512.mask.psub.") || // Added in 4.0
249       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
250       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
251       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
252       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
253       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
254       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
255       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
256       Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
257       Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
258       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
259       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
260       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
261       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
262       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
263       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
264       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
265       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
266       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
267       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
268       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
269       Name == "avx512.cvtusi2sd" || // Added in 7.0
270       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
271       Name == "sse2.pmulu.dq" || // Added in 7.0
272       Name == "sse41.pmuldq" || // Added in 7.0
273       Name == "avx2.pmulu.dq" || // Added in 7.0
274       Name == "avx2.pmul.dq" || // Added in 7.0
275       Name == "avx512.pmulu.dq.512" || // Added in 7.0
276       Name == "avx512.pmul.dq.512" || // Added in 7.0
277       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
278       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
279       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
280       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
281       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
282       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
283       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
284       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
285       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
286       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
287       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
288       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
289       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
290       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
291       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
292       Name.startswith("avx512.cmp.p") || // Added in 12.0
293       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
294       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
295       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
296       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
297       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
298       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
299       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
300       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
301       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
302       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
303       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
304       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
305       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
306       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
307       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
308       Name.startswith("avx512.mask.pslli") || // Added in 4.0
309       Name.startswith("avx512.mask.psrai") || // Added in 4.0
310       Name.startswith("avx512.mask.psrli") || // Added in 4.0
311       Name.startswith("avx512.mask.psllv") || // Added in 4.0
312       Name.startswith("avx512.mask.psrav") || // Added in 4.0
313       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
314       Name.startswith("sse41.pmovsx") || // Added in 3.8
315       Name.startswith("sse41.pmovzx") || // Added in 3.9
316       Name.startswith("avx2.pmovsx") || // Added in 3.9
317       Name.startswith("avx2.pmovzx") || // Added in 3.9
318       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
319       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
320       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
321       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
322       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
323       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
324       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
325       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
326       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
327       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
328       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
329       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
330       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
331       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
332       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
333       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
334       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
335       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
336       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
337       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
338       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
339       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
340       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
341       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
342       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
343       Name.startswith("avx512.vpshld.") || // Added in 8.0
344       Name.startswith("avx512.vpshrd.") || // Added in 8.0
345       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
346       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
347       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
348       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
349       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
350       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
351       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
352       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
353       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
354       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
355       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
356       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
357       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
358       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
359       Name == "sse.cvtsi2ss" || // Added in 7.0
360       Name == "sse.cvtsi642ss" || // Added in 7.0
361       Name == "sse2.cvtsi2sd" || // Added in 7.0
362       Name == "sse2.cvtsi642sd" || // Added in 7.0
363       Name == "sse2.cvtss2sd" || // Added in 7.0
364       Name == "sse2.cvtdq2pd" || // Added in 3.9
365       Name == "sse2.cvtdq2ps" || // Added in 7.0
366       Name == "sse2.cvtps2pd" || // Added in 3.9
367       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
368       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
369       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
370       Name.startswith("vcvtph2ps.") || // Added in 11.0
371       Name.startswith("avx.vinsertf128.") || // Added in 3.7
372       Name == "avx2.vinserti128" || // Added in 3.7
373       Name.startswith("avx512.mask.insert") || // Added in 4.0
374       Name.startswith("avx.vextractf128.") || // Added in 3.7
375       Name == "avx2.vextracti128" || // Added in 3.7
376       Name.startswith("avx512.mask.vextract") || // Added in 4.0
377       Name.startswith("sse4a.movnt.") || // Added in 3.9
378       Name.startswith("avx.movnt.") || // Added in 3.2
379       Name.startswith("avx512.storent.") || // Added in 3.9
380       Name == "sse41.movntdqa" || // Added in 5.0
381       Name == "avx2.movntdqa" || // Added in 5.0
382       Name == "avx512.movntdqa" || // Added in 5.0
383       Name == "sse2.storel.dq" || // Added in 3.9
384       Name.startswith("sse.storeu.") || // Added in 3.9
385       Name.startswith("sse2.storeu.") || // Added in 3.9
386       Name.startswith("avx.storeu.") || // Added in 3.9
387       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
388       Name.startswith("avx512.mask.store.p") || // Added in 3.9
389       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
390       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
391       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
392       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
393       Name == "avx512.mask.store.ss" || // Added in 7.0
394       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
395       Name.startswith("avx512.mask.load.") || // Added in 3.9
396       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
397       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
398       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
399       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
400       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
401       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
402       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
403       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
404       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
405       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
406       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
407       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
408       Name == "sse42.crc32.64.8" || // Added in 3.4
409       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
410       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
411       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
412       Name.startswith("avx512.mask.valign.") || // Added in 4.0
413       Name.startswith("sse2.psll.dq") || // Added in 3.7
414       Name.startswith("sse2.psrl.dq") || // Added in 3.7
415       Name.startswith("avx2.psll.dq") || // Added in 3.7
416       Name.startswith("avx2.psrl.dq") || // Added in 3.7
417       Name.startswith("avx512.psll.dq") || // Added in 3.9
418       Name.startswith("avx512.psrl.dq") || // Added in 3.9
419       Name == "sse41.pblendw" || // Added in 3.7
420       Name.startswith("sse41.blendp") || // Added in 3.7
421       Name.startswith("avx.blend.p") || // Added in 3.7
422       Name == "avx2.pblendw" || // Added in 3.7
423       Name.startswith("avx2.pblendd.") || // Added in 3.7
424       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
425       Name == "avx2.vbroadcasti128" || // Added in 3.7
426       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
427       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
428       Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
429       Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
430       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
431       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
432       Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
433       Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
434       Name == "xop.vpcmov" || // Added in 3.8
435       Name == "xop.vpcmov.256" || // Added in 5.0
436       Name.startswith("avx512.mask.move.s") || // Added in 4.0
437       Name.startswith("avx512.cvtmask2") || // Added in 5.0
438       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
439       Name.startswith("xop.vprot") || // Added in 8.0
440       Name.startswith("avx512.prol") || // Added in 8.0
441       Name.startswith("avx512.pror") || // Added in 8.0
442       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
443       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
444       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
445       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
446       Name.startswith("avx512.ptestm") || //Added in 6.0
447       Name.startswith("avx512.ptestnm") || //Added in 6.0
448       Name.startswith("avx512.mask.pavg")) // Added in 6.0
449     return true;
450 
451   return false;
452 }
453 
454 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
455                                         Function *&NewFn) {
456   // Only handle intrinsics that start with "x86.".
457   if (!Name.startswith("x86."))
458     return false;
459   // Remove "x86." prefix.
460   Name = Name.substr(4);
461 
462   if (ShouldUpgradeX86Intrinsic(F, Name)) {
463     NewFn = nullptr;
464     return true;
465   }
466 
467   if (Name == "rdtscp") { // Added in 8.0
468     // If this intrinsic has 0 operands, it's the new version.
469     if (F->getFunctionType()->getNumParams() == 0)
470       return false;
471 
472     rename(F);
473     NewFn = Intrinsic::getDeclaration(F->getParent(),
474                                       Intrinsic::x86_rdtscp);
475     return true;
476   }
477 
478   // SSE4.1 ptest functions may have an old signature.
479   if (Name.startswith("sse41.ptest")) { // Added in 3.2
480     if (Name.substr(11) == "c")
481       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
482     if (Name.substr(11) == "z")
483       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
484     if (Name.substr(11) == "nzc")
485       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
486   }
487   // Several blend and other instructions with masks used the wrong number of
488   // bits.
489   if (Name == "sse41.insertps") // Added in 3.6
490     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
491                                             NewFn);
492   if (Name == "sse41.dppd") // Added in 3.6
493     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
494                                             NewFn);
495   if (Name == "sse41.dpps") // Added in 3.6
496     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
497                                             NewFn);
498   if (Name == "sse41.mpsadbw") // Added in 3.6
499     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
500                                             NewFn);
501   if (Name == "avx.dp.ps.256") // Added in 3.6
502     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
503                                             NewFn);
504   if (Name == "avx2.mpsadbw") // Added in 3.6
505     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
506                                             NewFn);
507   if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
508     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
509                                      NewFn);
510   if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
511     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
512                                      NewFn);
513   if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
514     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
515                                      NewFn);
516   if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
517     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
518                                      NewFn);
519   if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
520     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
521                                      NewFn);
522   if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
523     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
524                                      NewFn);
525   if (Name == "avx512bf16.cvtne2ps2bf16.128") // Added in 9.0
526     return UpgradeX86BF16Intrinsic(
527         F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128, NewFn);
528   if (Name == "avx512bf16.cvtne2ps2bf16.256") // Added in 9.0
529     return UpgradeX86BF16Intrinsic(
530         F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256, NewFn);
531   if (Name == "avx512bf16.cvtne2ps2bf16.512") // Added in 9.0
532     return UpgradeX86BF16Intrinsic(
533         F, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512, NewFn);
534   if (Name == "avx512bf16.mask.cvtneps2bf16.128") // Added in 9.0
535     return UpgradeX86BF16Intrinsic(
536         F, Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128, NewFn);
537   if (Name == "avx512bf16.cvtneps2bf16.256") // Added in 9.0
538     return UpgradeX86BF16Intrinsic(
539         F, Intrinsic::x86_avx512bf16_cvtneps2bf16_256, NewFn);
540   if (Name == "avx512bf16.cvtneps2bf16.512") // Added in 9.0
541     return UpgradeX86BF16Intrinsic(
542         F, Intrinsic::x86_avx512bf16_cvtneps2bf16_512, NewFn);
543   if (Name == "avx512bf16.dpbf16ps.128") // Added in 9.0
544     return UpgradeX86BF16DPIntrinsic(
545         F, Intrinsic::x86_avx512bf16_dpbf16ps_128, NewFn);
546   if (Name == "avx512bf16.dpbf16ps.256") // Added in 9.0
547     return UpgradeX86BF16DPIntrinsic(
548         F, Intrinsic::x86_avx512bf16_dpbf16ps_256, NewFn);
549   if (Name == "avx512bf16.dpbf16ps.512") // Added in 9.0
550     return UpgradeX86BF16DPIntrinsic(
551         F, Intrinsic::x86_avx512bf16_dpbf16ps_512, NewFn);
552 
553   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
554   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
555     rename(F);
556     NewFn = Intrinsic::getDeclaration(F->getParent(),
557                                       Intrinsic::x86_xop_vfrcz_ss);
558     return true;
559   }
560   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
561     rename(F);
562     NewFn = Intrinsic::getDeclaration(F->getParent(),
563                                       Intrinsic::x86_xop_vfrcz_sd);
564     return true;
565   }
566   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
567   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
568     auto Idx = F->getFunctionType()->getParamType(2);
569     if (Idx->isFPOrFPVectorTy()) {
570       rename(F);
571       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
572       unsigned EltSize = Idx->getScalarSizeInBits();
573       Intrinsic::ID Permil2ID;
574       if (EltSize == 64 && IdxSize == 128)
575         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
576       else if (EltSize == 32 && IdxSize == 128)
577         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
578       else if (EltSize == 64 && IdxSize == 256)
579         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
580       else
581         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
582       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
583       return true;
584     }
585   }
586 
587   if (Name == "seh.recoverfp") {
588     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
589     return true;
590   }
591 
592   return false;
593 }
594 
595 static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
596   return StringSwitch<Intrinsic::ID>(Name)
597       .Case("abs.bf16", Intrinsic::nvvm_abs_bf16)
598       .Case("abs.bf16x2", Intrinsic::nvvm_abs_bf16x2)
599       .Case("fma.rn.bf16", Intrinsic::nvvm_fma_rn_bf16)
600       .Case("fma.rn.bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
601       .Case("fma.rn.ftz_bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
602       .Case("fma.rn.ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
603       .Case("fma.rn.ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
604       .Case("fma.rn.ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
605       .Case("fma.rn.ftz_sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
606       .Case("fma.rn.ftz_sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
607       .Case("fma.rn.relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
608       .Case("fma.rn.relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
609       .Case("fma.rn.sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
610       .Case("fma.rn.sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
611       .Case("fmax.bf16", Intrinsic::nvvm_fmax_bf16)
612       .Case("fmax.bf16x2", Intrinsic::nvvm_fmax_bf16x2)
613       .Case("fmax.ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
614       .Case("fmax.ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
615       .Case("fmax.ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
616       .Case("fmax.ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
617       .Case("fmax.ftz.nan.xorsign.abs.bf16",
618             Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
619       .Case("fmax.ftz.nan.xorsign.abs.bf16x2",
620             Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
621       .Case("fmax.ftz.xorsign.abs.bf16",
622             Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
623       .Case("fmax.ftz.xorsign.abs.bf16x2",
624             Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
625       .Case("fmax.nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
626       .Case("fmax.nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
627       .Case("fmax.nan.xorsign.abs.bf16",
628             Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
629       .Case("fmax.nan.xorsign.abs.bf16x2",
630             Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
631       .Case("fmax.xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
632       .Case("fmax.xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
633       .Case("fmin.bf16", Intrinsic::nvvm_fmin_bf16)
634       .Case("fmin.bf16x2", Intrinsic::nvvm_fmin_bf16x2)
635       .Case("fmin.ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
636       .Case("fmin.ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
637       .Case("fmin.ftz.nan_bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
638       .Case("fmin.ftz.nan_bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
639       .Case("fmin.ftz.nan.xorsign.abs.bf16",
640             Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
641       .Case("fmin.ftz.nan.xorsign.abs.bf16x2",
642             Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
643       .Case("fmin.ftz.xorsign.abs.bf16",
644             Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
645       .Case("fmin.ftz.xorsign.abs.bf16x2",
646             Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
647       .Case("fmin.nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
648       .Case("fmin.nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
649       .Case("fmin.nan.xorsign.abs.bf16",
650             Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
651       .Case("fmin.nan.xorsign.abs.bf16x2",
652             Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
653       .Case("fmin.xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
654       .Case("fmin.xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
655       .Case("neg.bf16", Intrinsic::nvvm_neg_bf16)
656       .Case("neg.bf16x2", Intrinsic::nvvm_neg_bf16x2)
657       .Default(Intrinsic::not_intrinsic);
658 }
659 
660 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
661   assert(F && "Illegal to upgrade a non-existent Function.");
662 
663   // Quickly eliminate it, if it's not a candidate.
664   StringRef Name = F->getName();
665   if (Name.size() <= 7 || !Name.startswith("llvm."))
666     return false;
667   Name = Name.substr(5); // Strip off "llvm."
668 
669   switch (Name[0]) {
670   default: break;
671   case 'a': {
672     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
673       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
674                                         F->arg_begin()->getType());
675       return true;
676     }
677     if (Name.startswith("aarch64.neon.frintn")) {
678       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
679                                         F->arg_begin()->getType());
680       return true;
681     }
682     if (Name.startswith("aarch64.neon.rbit")) {
683       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
684                                         F->arg_begin()->getType());
685       return true;
686     }
687     if (Name == "aarch64.sve.bfdot.lane") {
688       NewFn = Intrinsic::getDeclaration(F->getParent(),
689                                         Intrinsic::aarch64_sve_bfdot_lane_v2);
690       return true;
691     }
692     if (Name == "aarch64.sve.bfmlalb.lane") {
693       NewFn = Intrinsic::getDeclaration(F->getParent(),
694                                         Intrinsic::aarch64_sve_bfmlalb_lane_v2);
695       return true;
696     }
697     if (Name == "aarch64.sve.bfmlalt.lane") {
698       NewFn = Intrinsic::getDeclaration(F->getParent(),
699                                         Intrinsic::aarch64_sve_bfmlalt_lane_v2);
700       return true;
701     }
702     static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)");
703     if (LdRegex.match(Name)) {
704       Type *ScalarTy =
705           dyn_cast<VectorType>(F->getReturnType())->getElementType();
706       ElementCount EC =
707           dyn_cast<VectorType>(F->arg_begin()->getType())->getElementCount();
708       Type *Ty = VectorType::get(ScalarTy, EC);
709       Intrinsic::ID ID =
710           StringSwitch<Intrinsic::ID>(Name)
711               .StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret)
712               .StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret)
713               .StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret)
714               .Default(Intrinsic::not_intrinsic);
715       NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
716       return true;
717     }
718     if (Name.startswith("aarch64.sve.tuple.get")) {
719       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
720       NewFn = Intrinsic::getDeclaration(F->getParent(),
721                                         Intrinsic::vector_extract, Tys);
722       return true;
723     }
724     if (Name.startswith("aarch64.sve.tuple.set")) {
725       auto Args = F->getFunctionType()->params();
726       Type *Tys[] = {Args[0], Args[2], Args[1]};
727       NewFn = Intrinsic::getDeclaration(F->getParent(),
728                                         Intrinsic::vector_insert, Tys);
729       return true;
730     }
731     static const Regex CreateTupleRegex(
732         "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
733     if (CreateTupleRegex.match(Name)) {
734       auto Args = F->getFunctionType()->params();
735       Type *Tys[] = {F->getReturnType(), Args[1]};
736       NewFn = Intrinsic::getDeclaration(F->getParent(),
737                                         Intrinsic::vector_insert, Tys);
738       return true;
739     }
740     if (Name.startswith("arm.neon.vclz")) {
741       Type* args[2] = {
742         F->arg_begin()->getType(),
743         Type::getInt1Ty(F->getContext())
744       };
745       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
746       // the end of the name. Change name from llvm.arm.neon.vclz.* to
747       //  llvm.ctlz.*
748       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
749       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
750                                "llvm.ctlz." + Name.substr(14), F->getParent());
751       return true;
752     }
753     if (Name.startswith("arm.neon.vcnt")) {
754       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
755                                         F->arg_begin()->getType());
756       return true;
757     }
758     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
759     if (vstRegex.match(Name)) {
760       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
761                                                 Intrinsic::arm_neon_vst2,
762                                                 Intrinsic::arm_neon_vst3,
763                                                 Intrinsic::arm_neon_vst4};
764 
765       static const Intrinsic::ID StoreLaneInts[] = {
766         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
767         Intrinsic::arm_neon_vst4lane
768       };
769 
770       auto fArgs = F->getFunctionType()->params();
771       Type *Tys[] = {fArgs[0], fArgs[1]};
772       if (!Name.contains("lane"))
773         NewFn = Intrinsic::getDeclaration(F->getParent(),
774                                           StoreInts[fArgs.size() - 3], Tys);
775       else
776         NewFn = Intrinsic::getDeclaration(F->getParent(),
777                                           StoreLaneInts[fArgs.size() - 5], Tys);
778       return true;
779     }
780     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
781       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
782       return true;
783     }
784     if (Name.startswith("arm.neon.vqadds.")) {
785       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
786                                         F->arg_begin()->getType());
787       return true;
788     }
789     if (Name.startswith("arm.neon.vqaddu.")) {
790       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
791                                         F->arg_begin()->getType());
792       return true;
793     }
794     if (Name.startswith("arm.neon.vqsubs.")) {
795       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
796                                         F->arg_begin()->getType());
797       return true;
798     }
799     if (Name.startswith("arm.neon.vqsubu.")) {
800       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
801                                         F->arg_begin()->getType());
802       return true;
803     }
804     if (Name.startswith("aarch64.neon.addp")) {
805       if (F->arg_size() != 2)
806         break; // Invalid IR.
807       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
808       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
809         NewFn = Intrinsic::getDeclaration(F->getParent(),
810                                           Intrinsic::aarch64_neon_faddp, Ty);
811         return true;
812       }
813     }
814 
815     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
816     // respectively
817     if ((Name.startswith("arm.neon.bfdot.") ||
818          Name.startswith("aarch64.neon.bfdot.")) &&
819         Name.endswith("i8")) {
820       Intrinsic::ID IID =
821           StringSwitch<Intrinsic::ID>(Name)
822               .Cases("arm.neon.bfdot.v2f32.v8i8",
823                      "arm.neon.bfdot.v4f32.v16i8",
824                      Intrinsic::arm_neon_bfdot)
825               .Cases("aarch64.neon.bfdot.v2f32.v8i8",
826                      "aarch64.neon.bfdot.v4f32.v16i8",
827                      Intrinsic::aarch64_neon_bfdot)
828               .Default(Intrinsic::not_intrinsic);
829       if (IID == Intrinsic::not_intrinsic)
830         break;
831 
832       size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
833       assert((OperandWidth == 64 || OperandWidth == 128) &&
834              "Unexpected operand width");
835       LLVMContext &Ctx = F->getParent()->getContext();
836       std::array<Type *, 2> Tys {{
837         F->getReturnType(),
838         FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
839       }};
840       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
841       return true;
842     }
843 
844     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
845     // and accept v8bf16 instead of v16i8
846     if ((Name.startswith("arm.neon.bfm") ||
847          Name.startswith("aarch64.neon.bfm")) &&
848         Name.endswith(".v4f32.v16i8")) {
849       Intrinsic::ID IID =
850           StringSwitch<Intrinsic::ID>(Name)
851               .Case("arm.neon.bfmmla.v4f32.v16i8",
852                     Intrinsic::arm_neon_bfmmla)
853               .Case("arm.neon.bfmlalb.v4f32.v16i8",
854                     Intrinsic::arm_neon_bfmlalb)
855               .Case("arm.neon.bfmlalt.v4f32.v16i8",
856                     Intrinsic::arm_neon_bfmlalt)
857               .Case("aarch64.neon.bfmmla.v4f32.v16i8",
858                     Intrinsic::aarch64_neon_bfmmla)
859               .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
860                     Intrinsic::aarch64_neon_bfmlalb)
861               .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
862                     Intrinsic::aarch64_neon_bfmlalt)
863               .Default(Intrinsic::not_intrinsic);
864       if (IID == Intrinsic::not_intrinsic)
865         break;
866 
867       std::array<Type *, 0> Tys;
868       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
869       return true;
870     }
871 
872     if (Name == "arm.mve.vctp64" &&
873         cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
874       // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
875       // function and deal with it below in UpgradeIntrinsicCall.
876       rename(F);
877       return true;
878     }
879     // These too are changed to accept a v2i1 insteead of the old v4i1.
880     if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
881         Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
882         Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
883         Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
884         Name ==
885             "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
886         Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
887         Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
888         Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
889         Name ==
890             "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
891         Name == "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
892         Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
893         Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
894         Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
895         Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
896         Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
897         Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
898       return true;
899 
900     if (Name.startswith("amdgcn."))
901       Name = Name.substr(7); // Strip off "amdgcn."
902 
903     if (Name == "alignbit") {
904       // Target specific intrinsic became redundant
905       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
906                                         {F->getReturnType()});
907       return true;
908     }
909 
910     if (Name.startswith("atomic.inc") || Name.startswith("atomic.dec")) {
911       // This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no
912       // new declaration.
913       NewFn = nullptr;
914       return true;
915     }
916 
917     break;
918   }
919   case 'c': {
920     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
921       rename(F);
922       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
923                                         F->arg_begin()->getType());
924       return true;
925     }
926     if (Name.startswith("cttz.") && F->arg_size() == 1) {
927       rename(F);
928       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
929                                         F->arg_begin()->getType());
930       return true;
931     }
932     break;
933   }
934   case 'd': {
935     if (Name == "dbg.addr") {
936       rename(F);
937       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
938       return true;
939     }
940     if (Name == "dbg.value" && F->arg_size() == 4) {
941       rename(F);
942       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
943       return true;
944     }
945     break;
946   }
947   case 'e': {
948     if (Name.startswith("experimental.vector.extract.")) {
949       rename(F);
950       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
951       NewFn = Intrinsic::getDeclaration(F->getParent(),
952                                         Intrinsic::vector_extract, Tys);
953       return true;
954     }
955 
956     if (Name.startswith("experimental.vector.insert.")) {
957       rename(F);
958       auto Args = F->getFunctionType()->params();
959       Type *Tys[] = {Args[0], Args[1]};
960       NewFn = Intrinsic::getDeclaration(F->getParent(),
961                                         Intrinsic::vector_insert, Tys);
962       return true;
963     }
964 
965     SmallVector<StringRef, 2> Groups;
966     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
967     if (R.match(Name, &Groups)) {
968       Intrinsic::ID ID;
969       ID = StringSwitch<Intrinsic::ID>(Groups[1])
970                .Case("add", Intrinsic::vector_reduce_add)
971                .Case("mul", Intrinsic::vector_reduce_mul)
972                .Case("and", Intrinsic::vector_reduce_and)
973                .Case("or", Intrinsic::vector_reduce_or)
974                .Case("xor", Intrinsic::vector_reduce_xor)
975                .Case("smax", Intrinsic::vector_reduce_smax)
976                .Case("smin", Intrinsic::vector_reduce_smin)
977                .Case("umax", Intrinsic::vector_reduce_umax)
978                .Case("umin", Intrinsic::vector_reduce_umin)
979                .Case("fmax", Intrinsic::vector_reduce_fmax)
980                .Case("fmin", Intrinsic::vector_reduce_fmin)
981                .Default(Intrinsic::not_intrinsic);
982       if (ID != Intrinsic::not_intrinsic) {
983         rename(F);
984         auto Args = F->getFunctionType()->params();
985         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
986         return true;
987       }
988     }
989     static const Regex R2(
990         "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
991     Groups.clear();
992     if (R2.match(Name, &Groups)) {
993       Intrinsic::ID ID = Intrinsic::not_intrinsic;
994       if (Groups[1] == "fadd")
995         ID = Intrinsic::vector_reduce_fadd;
996       if (Groups[1] == "fmul")
997         ID = Intrinsic::vector_reduce_fmul;
998       if (ID != Intrinsic::not_intrinsic) {
999         rename(F);
1000         auto Args = F->getFunctionType()->params();
1001         Type *Tys[] = {Args[1]};
1002         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1003         return true;
1004       }
1005     }
1006     break;
1007   }
1008   case 'f':
1009     if (Name.startswith("flt.rounds")) {
1010       rename(F);
1011       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1012       return true;
1013     }
1014     break;
1015   case 'i':
1016   case 'l': {
1017     bool IsLifetimeStart = Name.startswith("lifetime.start");
1018     if (IsLifetimeStart || Name.startswith("invariant.start")) {
1019       Intrinsic::ID ID = IsLifetimeStart ?
1020         Intrinsic::lifetime_start : Intrinsic::invariant_start;
1021       auto Args = F->getFunctionType()->params();
1022       Type* ObjectPtr[1] = {Args[1]};
1023       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
1024         rename(F);
1025         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
1026         return true;
1027       }
1028     }
1029 
1030     bool IsLifetimeEnd = Name.startswith("lifetime.end");
1031     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
1032       Intrinsic::ID ID = IsLifetimeEnd ?
1033         Intrinsic::lifetime_end : Intrinsic::invariant_end;
1034 
1035       auto Args = F->getFunctionType()->params();
1036       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
1037       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
1038         rename(F);
1039         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
1040         return true;
1041       }
1042     }
1043     if (Name.startswith("invariant.group.barrier")) {
1044       // Rename invariant.group.barrier to launder.invariant.group
1045       auto Args = F->getFunctionType()->params();
1046       Type* ObjectPtr[1] = {Args[0]};
1047       rename(F);
1048       NewFn = Intrinsic::getDeclaration(F->getParent(),
1049           Intrinsic::launder_invariant_group, ObjectPtr);
1050       return true;
1051 
1052     }
1053 
1054     break;
1055   }
1056   case 'm': {
1057     if (Name.startswith("masked.load.")) {
1058       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
1059       if (F->getName() !=
1060           Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
1061         rename(F);
1062         NewFn = Intrinsic::getDeclaration(F->getParent(),
1063                                           Intrinsic::masked_load,
1064                                           Tys);
1065         return true;
1066       }
1067     }
1068     if (Name.startswith("masked.store.")) {
1069       auto Args = F->getFunctionType()->params();
1070       Type *Tys[] = { Args[0], Args[1] };
1071       if (F->getName() !=
1072           Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
1073         rename(F);
1074         NewFn = Intrinsic::getDeclaration(F->getParent(),
1075                                           Intrinsic::masked_store,
1076                                           Tys);
1077         return true;
1078       }
1079     }
1080     // Renaming gather/scatter intrinsics with no address space overloading
1081     // to the new overload which includes an address space
1082     if (Name.startswith("masked.gather.")) {
1083       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1084       if (F->getName() !=
1085           Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
1086         rename(F);
1087         NewFn = Intrinsic::getDeclaration(F->getParent(),
1088                                           Intrinsic::masked_gather, Tys);
1089         return true;
1090       }
1091     }
1092     if (Name.startswith("masked.scatter.")) {
1093       auto Args = F->getFunctionType()->params();
1094       Type *Tys[] = {Args[0], Args[1]};
1095       if (F->getName() !=
1096           Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
1097         rename(F);
1098         NewFn = Intrinsic::getDeclaration(F->getParent(),
1099                                           Intrinsic::masked_scatter, Tys);
1100         return true;
1101       }
1102     }
1103     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1104     // alignment parameter to embedding the alignment as an attribute of
1105     // the pointer args.
1106     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
1107       rename(F);
1108       // Get the types of dest, src, and len
1109       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
1110       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
1111                                         ParamTypes);
1112       return true;
1113     }
1114     if (Name.startswith("memmove.") && F->arg_size() == 5) {
1115       rename(F);
1116       // Get the types of dest, src, and len
1117       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
1118       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
1119                                         ParamTypes);
1120       return true;
1121     }
1122     if (Name.startswith("memset.") && F->arg_size() == 5) {
1123       rename(F);
1124       // Get the types of dest, and len
1125       const auto *FT = F->getFunctionType();
1126       Type *ParamTypes[2] = {
1127           FT->getParamType(0), // Dest
1128           FT->getParamType(2)  // len
1129       };
1130       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1131                                         ParamTypes);
1132       return true;
1133     }
1134     break;
1135   }
1136   case 'n': {
1137     if (Name.startswith("nvvm.")) {
1138       Name = Name.substr(5);
1139 
1140       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
1141       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
1142                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
1143                               .Case("clz.i", Intrinsic::ctlz)
1144                               .Case("popc.i", Intrinsic::ctpop)
1145                               .Default(Intrinsic::not_intrinsic);
1146       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
1147         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1148                                           {F->getReturnType()});
1149         return true;
1150       }
1151       IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
1152       if (IID != Intrinsic::not_intrinsic &&
1153           !F->getReturnType()->getScalarType()->isBFloatTy()) {
1154         NewFn = nullptr;
1155         return true;
1156       }
1157       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1158       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1159       //
1160       // TODO: We could add lohi.i2d.
1161       bool Expand = StringSwitch<bool>(Name)
1162                         .Cases("abs.i", "abs.ll", true)
1163                         .Cases("clz.ll", "popc.ll", "h2f", true)
1164                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
1165                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
1166                         .StartsWith("atomic.load.add.f32.p", true)
1167                         .StartsWith("atomic.load.add.f64.p", true)
1168                         .Default(false);
1169       if (Expand) {
1170         NewFn = nullptr;
1171         return true;
1172       }
1173     }
1174     break;
1175   }
1176   case 'o':
1177     // We only need to change the name to match the mangling including the
1178     // address space.
1179     if (Name.startswith("objectsize.")) {
1180       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1181       if (F->arg_size() == 2 || F->arg_size() == 3 ||
1182           F->getName() !=
1183               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1184         rename(F);
1185         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1186                                           Tys);
1187         return true;
1188       }
1189     }
1190     break;
1191 
1192   case 'p':
1193     if (Name == "prefetch") {
1194       // Handle address space overloading.
1195       Type *Tys[] = {F->arg_begin()->getType()};
1196       if (F->getName() !=
1197           Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
1198         rename(F);
1199         NewFn =
1200             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
1201         return true;
1202       }
1203     } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
1204       rename(F);
1205       NewFn = Intrinsic::getDeclaration(
1206           F->getParent(), Intrinsic::ptr_annotation,
1207           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1208       return true;
1209     }
1210     break;
1211 
1212   case 'r':
1213     if (Name == "riscv.aes32dsi" &&
1214         !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1215       rename(F);
1216       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsi);
1217       return true;
1218     }
1219     if (Name == "riscv.aes32dsmi" &&
1220         !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1221       rename(F);
1222       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsmi);
1223       return true;
1224     }
1225     if (Name == "riscv.aes32esi" &&
1226         !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1227       rename(F);
1228       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esi);
1229       return true;
1230     }
1231     if (Name == "riscv.aes32esmi" &&
1232         !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1233       rename(F);
1234       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esmi);
1235       return true;
1236     }
1237     if (Name.startswith("riscv.sm4ks") &&
1238         (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1239          F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
1240       rename(F);
1241       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ks);
1242       return true;
1243     }
1244     if (Name.startswith("riscv.sm4ed") &&
1245         (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1246          F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
1247       rename(F);
1248       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ed);
1249       return true;
1250     }
1251     if (Name.startswith("riscv.sha256sig0") &&
1252         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1253       rename(F);
1254       NewFn = Intrinsic::getDeclaration(F->getParent(),
1255                                         Intrinsic::riscv_sha256sig0);
1256       return true;
1257     }
1258     if (Name.startswith("riscv.sha256sig1") &&
1259         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1260       rename(F);
1261       NewFn = Intrinsic::getDeclaration(F->getParent(),
1262                                         Intrinsic::riscv_sha256sig1);
1263       return true;
1264     }
1265     if (Name.startswith("riscv.sha256sum0") &&
1266         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1267       rename(F);
1268       NewFn = Intrinsic::getDeclaration(F->getParent(),
1269                                         Intrinsic::riscv_sha256sum0);
1270       return true;
1271     }
1272     if (Name.startswith("riscv.sha256sum1") &&
1273         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1274       rename(F);
1275       NewFn = Intrinsic::getDeclaration(F->getParent(),
1276                                         Intrinsic::riscv_sha256sum1);
1277       return true;
1278     }
1279     if (Name.startswith("riscv.sm3p0") &&
1280         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1281       rename(F);
1282       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p0);
1283       return true;
1284     }
1285     if (Name.startswith("riscv.sm3p1") &&
1286         F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1287       rename(F);
1288       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p1);
1289       return true;
1290     }
1291     break;
1292 
1293   case 's':
1294     if (Name == "stackprotectorcheck") {
1295       NewFn = nullptr;
1296       return true;
1297     }
1298     break;
1299 
1300   case 'v': {
1301     if (Name == "var.annotation" && F->arg_size() == 4) {
1302       rename(F);
1303       NewFn = Intrinsic::getDeclaration(
1304           F->getParent(), Intrinsic::var_annotation,
1305           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1306       return true;
1307     }
1308     break;
1309   }
1310 
1311   case 'w':
1312     if (Name.startswith("wasm.fma.")) {
1313       rename(F);
1314       NewFn = Intrinsic::getDeclaration(
1315           F->getParent(), Intrinsic::wasm_relaxed_madd, F->getReturnType());
1316       return true;
1317     }
1318     if (Name.startswith("wasm.fms.")) {
1319       rename(F);
1320       NewFn = Intrinsic::getDeclaration(
1321           F->getParent(), Intrinsic::wasm_relaxed_nmadd, F->getReturnType());
1322       return true;
1323     }
1324     if (Name.startswith("wasm.laneselect.")) {
1325       rename(F);
1326       NewFn = Intrinsic::getDeclaration(
1327           F->getParent(), Intrinsic::wasm_relaxed_laneselect,
1328           F->getReturnType());
1329       return true;
1330     }
1331     if (Name == "wasm.dot.i8x16.i7x16.signed") {
1332       rename(F);
1333       NewFn = Intrinsic::getDeclaration(
1334           F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
1335       return true;
1336     }
1337     if (Name == "wasm.dot.i8x16.i7x16.add.signed") {
1338       rename(F);
1339       NewFn = Intrinsic::getDeclaration(
1340           F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
1341       return true;
1342     }
1343     break;
1344 
1345   case 'x':
1346     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1347       return true;
1348   }
1349 
1350   auto *ST = dyn_cast<StructType>(F->getReturnType());
1351   if (ST && (!ST->isLiteral() || ST->isPacked())) {
1352     // Replace return type with literal non-packed struct. Only do this for
1353     // intrinsics declared to return a struct, not for intrinsics with
1354     // overloaded return type, in which case the exact struct type will be
1355     // mangled into the name.
1356     SmallVector<Intrinsic::IITDescriptor> Desc;
1357     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1358     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1359       auto *FT = F->getFunctionType();
1360       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1361       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1362       std::string Name = F->getName().str();
1363       rename(F);
1364       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1365                                Name, F->getParent());
1366 
1367       // The new function may also need remangling.
1368       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1369         NewFn = *Result;
1370       return true;
1371     }
1372   }
1373 
1374   // Remangle our intrinsic since we upgrade the mangling
1375   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1376   if (Result != std::nullopt) {
1377     NewFn = *Result;
1378     return true;
1379   }
1380 
1381   //  This may not belong here. This function is effectively being overloaded
1382   //  to both detect an intrinsic which needs upgrading, and to provide the
1383   //  upgraded form of the intrinsic. We should perhaps have two separate
1384   //  functions for this.
1385   return false;
1386 }
1387 
1388 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1389   NewFn = nullptr;
1390   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1391   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1392 
1393   // Upgrade intrinsic attributes.  This does not change the function.
1394   if (NewFn)
1395     F = NewFn;
1396   if (Intrinsic::ID id = F->getIntrinsicID())
1397     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1398   return Upgraded;
1399 }
1400 
1401 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1402   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1403                           GV->getName() == "llvm.global_dtors")) ||
1404       !GV->hasInitializer())
1405     return nullptr;
1406   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1407   if (!ATy)
1408     return nullptr;
1409   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1410   if (!STy || STy->getNumElements() != 2)
1411     return nullptr;
1412 
1413   LLVMContext &C = GV->getContext();
1414   IRBuilder<> IRB(C);
1415   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1416                                IRB.getInt8PtrTy());
1417   Constant *Init = GV->getInitializer();
1418   unsigned N = Init->getNumOperands();
1419   std::vector<Constant *> NewCtors(N);
1420   for (unsigned i = 0; i != N; ++i) {
1421     auto Ctor = cast<Constant>(Init->getOperand(i));
1422     NewCtors[i] = ConstantStruct::get(
1423         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1424         Constant::getNullValue(IRB.getInt8PtrTy()));
1425   }
1426   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1427 
1428   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1429                             NewInit, GV->getName());
1430 }
1431 
1432 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1433 // to byte shuffles.
1434 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1435                                          Value *Op, unsigned Shift) {
1436   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1437   unsigned NumElts = ResultTy->getNumElements() * 8;
1438 
1439   // Bitcast from a 64-bit element type to a byte element type.
1440   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1441   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1442 
1443   // We'll be shuffling in zeroes.
1444   Value *Res = Constant::getNullValue(VecTy);
1445 
1446   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1447   // we'll just return the zero vector.
1448   if (Shift < 16) {
1449     int Idxs[64];
1450     // 256/512-bit version is split into 2/4 16-byte lanes.
1451     for (unsigned l = 0; l != NumElts; l += 16)
1452       for (unsigned i = 0; i != 16; ++i) {
1453         unsigned Idx = NumElts + i - Shift;
1454         if (Idx < NumElts)
1455           Idx -= NumElts - 16; // end of lane, switch operand.
1456         Idxs[l + i] = Idx + l;
1457       }
1458 
1459     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1460   }
1461 
1462   // Bitcast back to a 64-bit element type.
1463   return Builder.CreateBitCast(Res, ResultTy, "cast");
1464 }
1465 
1466 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1467 // to byte shuffles.
1468 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1469                                          unsigned Shift) {
1470   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1471   unsigned NumElts = ResultTy->getNumElements() * 8;
1472 
1473   // Bitcast from a 64-bit element type to a byte element type.
1474   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1475   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1476 
1477   // We'll be shuffling in zeroes.
1478   Value *Res = Constant::getNullValue(VecTy);
1479 
1480   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1481   // we'll just return the zero vector.
1482   if (Shift < 16) {
1483     int Idxs[64];
1484     // 256/512-bit version is split into 2/4 16-byte lanes.
1485     for (unsigned l = 0; l != NumElts; l += 16)
1486       for (unsigned i = 0; i != 16; ++i) {
1487         unsigned Idx = i + Shift;
1488         if (Idx >= 16)
1489           Idx += NumElts - 16; // end of lane, switch operand.
1490         Idxs[l + i] = Idx + l;
1491       }
1492 
1493     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1494   }
1495 
1496   // Bitcast back to a 64-bit element type.
1497   return Builder.CreateBitCast(Res, ResultTy, "cast");
1498 }
1499 
1500 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1501                             unsigned NumElts) {
1502   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1503   llvm::VectorType *MaskTy = FixedVectorType::get(
1504       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1505   Mask = Builder.CreateBitCast(Mask, MaskTy);
1506 
1507   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1508   // i8 and we need to extract down to the right number of elements.
1509   if (NumElts <= 4) {
1510     int Indices[4];
1511     for (unsigned i = 0; i != NumElts; ++i)
1512       Indices[i] = i;
1513     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1514                                        "extract");
1515   }
1516 
1517   return Mask;
1518 }
1519 
1520 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1521                             Value *Op0, Value *Op1) {
1522   // If the mask is all ones just emit the first operation.
1523   if (const auto *C = dyn_cast<Constant>(Mask))
1524     if (C->isAllOnesValue())
1525       return Op0;
1526 
1527   Mask = getX86MaskVec(Builder, Mask,
1528                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1529   return Builder.CreateSelect(Mask, Op0, Op1);
1530 }
1531 
1532 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1533                                   Value *Op0, Value *Op1) {
1534   // If the mask is all ones just emit the first operation.
1535   if (const auto *C = dyn_cast<Constant>(Mask))
1536     if (C->isAllOnesValue())
1537       return Op0;
1538 
1539   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1540                                       Mask->getType()->getIntegerBitWidth());
1541   Mask = Builder.CreateBitCast(Mask, MaskTy);
1542   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1543   return Builder.CreateSelect(Mask, Op0, Op1);
1544 }
1545 
1546 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1547 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1548 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1549 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1550                                         Value *Op1, Value *Shift,
1551                                         Value *Passthru, Value *Mask,
1552                                         bool IsVALIGN) {
1553   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1554 
1555   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1556   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1557   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1558   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1559 
1560   // Mask the immediate for VALIGN.
1561   if (IsVALIGN)
1562     ShiftVal &= (NumElts - 1);
1563 
1564   // If palignr is shifting the pair of vectors more than the size of two
1565   // lanes, emit zero.
1566   if (ShiftVal >= 32)
1567     return llvm::Constant::getNullValue(Op0->getType());
1568 
1569   // If palignr is shifting the pair of input vectors more than one lane,
1570   // but less than two lanes, convert to shifting in zeroes.
1571   if (ShiftVal > 16) {
1572     ShiftVal -= 16;
1573     Op1 = Op0;
1574     Op0 = llvm::Constant::getNullValue(Op0->getType());
1575   }
1576 
1577   int Indices[64];
1578   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1579   for (unsigned l = 0; l < NumElts; l += 16) {
1580     for (unsigned i = 0; i != 16; ++i) {
1581       unsigned Idx = ShiftVal + i;
1582       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1583         Idx += NumElts - 16; // End of lane, switch operand.
1584       Indices[l + i] = Idx + l;
1585     }
1586   }
1587 
1588   Value *Align = Builder.CreateShuffleVector(
1589       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1590 
1591   return EmitX86Select(Builder, Mask, Align, Passthru);
1592 }
1593 
1594 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1595                                           bool ZeroMask, bool IndexForm) {
1596   Type *Ty = CI.getType();
1597   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1598   unsigned EltWidth = Ty->getScalarSizeInBits();
1599   bool IsFloat = Ty->isFPOrFPVectorTy();
1600   Intrinsic::ID IID;
1601   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1602     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1603   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1604     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1605   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1606     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1607   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1608     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1609   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1610     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1611   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1612     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1613   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1614     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1615   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1616     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1617   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1618     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1619   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1620     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1621   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1622     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1623   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1624     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1625   else if (VecWidth == 128 && EltWidth == 16)
1626     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1627   else if (VecWidth == 256 && EltWidth == 16)
1628     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1629   else if (VecWidth == 512 && EltWidth == 16)
1630     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1631   else if (VecWidth == 128 && EltWidth == 8)
1632     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1633   else if (VecWidth == 256 && EltWidth == 8)
1634     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1635   else if (VecWidth == 512 && EltWidth == 8)
1636     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1637   else
1638     llvm_unreachable("Unexpected intrinsic");
1639 
1640   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1641                     CI.getArgOperand(2) };
1642 
1643   // If this isn't index form we need to swap operand 0 and 1.
1644   if (!IndexForm)
1645     std::swap(Args[0], Args[1]);
1646 
1647   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1648                                 Args);
1649   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1650                              : Builder.CreateBitCast(CI.getArgOperand(1),
1651                                                      Ty);
1652   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1653 }
1654 
1655 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1656                                          Intrinsic::ID IID) {
1657   Type *Ty = CI.getType();
1658   Value *Op0 = CI.getOperand(0);
1659   Value *Op1 = CI.getOperand(1);
1660   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1661   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1662 
1663   if (CI.arg_size() == 4) { // For masked intrinsics.
1664     Value *VecSrc = CI.getOperand(2);
1665     Value *Mask = CI.getOperand(3);
1666     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1667   }
1668   return Res;
1669 }
1670 
1671 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1672                                bool IsRotateRight) {
1673   Type *Ty = CI.getType();
1674   Value *Src = CI.getArgOperand(0);
1675   Value *Amt = CI.getArgOperand(1);
1676 
1677   // Amount may be scalar immediate, in which case create a splat vector.
1678   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1679   // we only care about the lowest log2 bits anyway.
1680   if (Amt->getType() != Ty) {
1681     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1682     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1683     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1684   }
1685 
1686   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1687   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1688   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1689 
1690   if (CI.arg_size() == 4) { // For masked intrinsics.
1691     Value *VecSrc = CI.getOperand(2);
1692     Value *Mask = CI.getOperand(3);
1693     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1694   }
1695   return Res;
1696 }
1697 
1698 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1699                               bool IsSigned) {
1700   Type *Ty = CI.getType();
1701   Value *LHS = CI.getArgOperand(0);
1702   Value *RHS = CI.getArgOperand(1);
1703 
1704   CmpInst::Predicate Pred;
1705   switch (Imm) {
1706   case 0x0:
1707     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1708     break;
1709   case 0x1:
1710     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1711     break;
1712   case 0x2:
1713     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1714     break;
1715   case 0x3:
1716     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1717     break;
1718   case 0x4:
1719     Pred = ICmpInst::ICMP_EQ;
1720     break;
1721   case 0x5:
1722     Pred = ICmpInst::ICMP_NE;
1723     break;
1724   case 0x6:
1725     return Constant::getNullValue(Ty); // FALSE
1726   case 0x7:
1727     return Constant::getAllOnesValue(Ty); // TRUE
1728   default:
1729     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1730   }
1731 
1732   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1733   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1734   return Ext;
1735 }
1736 
1737 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1738                                     bool IsShiftRight, bool ZeroMask) {
1739   Type *Ty = CI.getType();
1740   Value *Op0 = CI.getArgOperand(0);
1741   Value *Op1 = CI.getArgOperand(1);
1742   Value *Amt = CI.getArgOperand(2);
1743 
1744   if (IsShiftRight)
1745     std::swap(Op0, Op1);
1746 
1747   // Amount may be scalar immediate, in which case create a splat vector.
1748   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1749   // we only care about the lowest log2 bits anyway.
1750   if (Amt->getType() != Ty) {
1751     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1752     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1753     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1754   }
1755 
1756   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1757   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1758   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1759 
1760   unsigned NumArgs = CI.arg_size();
1761   if (NumArgs >= 4) { // For masked intrinsics.
1762     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1763                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1764                                    CI.getArgOperand(0);
1765     Value *Mask = CI.getOperand(NumArgs - 1);
1766     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1767   }
1768   return Res;
1769 }
1770 
1771 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1772                                  Value *Ptr, Value *Data, Value *Mask,
1773                                  bool Aligned) {
1774   // Cast the pointer to the right type.
1775   Ptr = Builder.CreateBitCast(Ptr,
1776                               llvm::PointerType::getUnqual(Data->getType()));
1777   const Align Alignment =
1778       Aligned
1779           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1780           : Align(1);
1781 
1782   // If the mask is all ones just emit a regular store.
1783   if (const auto *C = dyn_cast<Constant>(Mask))
1784     if (C->isAllOnesValue())
1785       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1786 
1787   // Convert the mask from an integer type to a vector of i1.
1788   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1789   Mask = getX86MaskVec(Builder, Mask, NumElts);
1790   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1791 }
1792 
1793 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1794                                 Value *Ptr, Value *Passthru, Value *Mask,
1795                                 bool Aligned) {
1796   Type *ValTy = Passthru->getType();
1797   // Cast the pointer to the right type.
1798   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1799   const Align Alignment =
1800       Aligned
1801           ? Align(
1802                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1803                 8)
1804           : Align(1);
1805 
1806   // If the mask is all ones just emit a regular store.
1807   if (const auto *C = dyn_cast<Constant>(Mask))
1808     if (C->isAllOnesValue())
1809       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1810 
1811   // Convert the mask from an integer type to a vector of i1.
1812   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1813   Mask = getX86MaskVec(Builder, Mask, NumElts);
1814   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1815 }
1816 
1817 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1818   Type *Ty = CI.getType();
1819   Value *Op0 = CI.getArgOperand(0);
1820   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1821   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1822   if (CI.arg_size() == 3)
1823     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1824   return Res;
1825 }
1826 
1827 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1828   Type *Ty = CI.getType();
1829 
1830   // Arguments have a vXi32 type so cast to vXi64.
1831   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1832   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1833 
1834   if (IsSigned) {
1835     // Shift left then arithmetic shift right.
1836     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1837     LHS = Builder.CreateShl(LHS, ShiftAmt);
1838     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1839     RHS = Builder.CreateShl(RHS, ShiftAmt);
1840     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1841   } else {
1842     // Clear the upper bits.
1843     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1844     LHS = Builder.CreateAnd(LHS, Mask);
1845     RHS = Builder.CreateAnd(RHS, Mask);
1846   }
1847 
1848   Value *Res = Builder.CreateMul(LHS, RHS);
1849 
1850   if (CI.arg_size() == 4)
1851     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1852 
1853   return Res;
1854 }
1855 
1856 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1857 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1858                                      Value *Mask) {
1859   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1860   if (Mask) {
1861     const auto *C = dyn_cast<Constant>(Mask);
1862     if (!C || !C->isAllOnesValue())
1863       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1864   }
1865 
1866   if (NumElts < 8) {
1867     int Indices[8];
1868     for (unsigned i = 0; i != NumElts; ++i)
1869       Indices[i] = i;
1870     for (unsigned i = NumElts; i != 8; ++i)
1871       Indices[i] = NumElts + i % NumElts;
1872     Vec = Builder.CreateShuffleVector(Vec,
1873                                       Constant::getNullValue(Vec->getType()),
1874                                       Indices);
1875   }
1876   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1877 }
1878 
1879 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1880                                    unsigned CC, bool Signed) {
1881   Value *Op0 = CI.getArgOperand(0);
1882   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1883 
1884   Value *Cmp;
1885   if (CC == 3) {
1886     Cmp = Constant::getNullValue(
1887         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1888   } else if (CC == 7) {
1889     Cmp = Constant::getAllOnesValue(
1890         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1891   } else {
1892     ICmpInst::Predicate Pred;
1893     switch (CC) {
1894     default: llvm_unreachable("Unknown condition code");
1895     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1896     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1897     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1898     case 4: Pred = ICmpInst::ICMP_NE;  break;
1899     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1900     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1901     }
1902     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1903   }
1904 
1905   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1906 
1907   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1908 }
1909 
1910 // Replace a masked intrinsic with an older unmasked intrinsic.
1911 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1912                                     Intrinsic::ID IID) {
1913   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1914   Value *Rep = Builder.CreateCall(Intrin,
1915                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1916   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1917 }
1918 
1919 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1920   Value* A = CI.getArgOperand(0);
1921   Value* B = CI.getArgOperand(1);
1922   Value* Src = CI.getArgOperand(2);
1923   Value* Mask = CI.getArgOperand(3);
1924 
1925   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1926   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1927   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1928   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1929   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1930   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1931 }
1932 
1933 
1934 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1935   Value* Op = CI.getArgOperand(0);
1936   Type* ReturnOp = CI.getType();
1937   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1938   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1939   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1940 }
1941 
1942 // Replace intrinsic with unmasked version and a select.
1943 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1944                                       CallBase &CI, Value *&Rep) {
1945   Name = Name.substr(12); // Remove avx512.mask.
1946 
1947   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1948   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1949   Intrinsic::ID IID;
1950   if (Name.startswith("max.p")) {
1951     if (VecWidth == 128 && EltWidth == 32)
1952       IID = Intrinsic::x86_sse_max_ps;
1953     else if (VecWidth == 128 && EltWidth == 64)
1954       IID = Intrinsic::x86_sse2_max_pd;
1955     else if (VecWidth == 256 && EltWidth == 32)
1956       IID = Intrinsic::x86_avx_max_ps_256;
1957     else if (VecWidth == 256 && EltWidth == 64)
1958       IID = Intrinsic::x86_avx_max_pd_256;
1959     else
1960       llvm_unreachable("Unexpected intrinsic");
1961   } else if (Name.startswith("min.p")) {
1962     if (VecWidth == 128 && EltWidth == 32)
1963       IID = Intrinsic::x86_sse_min_ps;
1964     else if (VecWidth == 128 && EltWidth == 64)
1965       IID = Intrinsic::x86_sse2_min_pd;
1966     else if (VecWidth == 256 && EltWidth == 32)
1967       IID = Intrinsic::x86_avx_min_ps_256;
1968     else if (VecWidth == 256 && EltWidth == 64)
1969       IID = Intrinsic::x86_avx_min_pd_256;
1970     else
1971       llvm_unreachable("Unexpected intrinsic");
1972   } else if (Name.startswith("pshuf.b.")) {
1973     if (VecWidth == 128)
1974       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1975     else if (VecWidth == 256)
1976       IID = Intrinsic::x86_avx2_pshuf_b;
1977     else if (VecWidth == 512)
1978       IID = Intrinsic::x86_avx512_pshuf_b_512;
1979     else
1980       llvm_unreachable("Unexpected intrinsic");
1981   } else if (Name.startswith("pmul.hr.sw.")) {
1982     if (VecWidth == 128)
1983       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1984     else if (VecWidth == 256)
1985       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1986     else if (VecWidth == 512)
1987       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1988     else
1989       llvm_unreachable("Unexpected intrinsic");
1990   } else if (Name.startswith("pmulh.w.")) {
1991     if (VecWidth == 128)
1992       IID = Intrinsic::x86_sse2_pmulh_w;
1993     else if (VecWidth == 256)
1994       IID = Intrinsic::x86_avx2_pmulh_w;
1995     else if (VecWidth == 512)
1996       IID = Intrinsic::x86_avx512_pmulh_w_512;
1997     else
1998       llvm_unreachable("Unexpected intrinsic");
1999   } else if (Name.startswith("pmulhu.w.")) {
2000     if (VecWidth == 128)
2001       IID = Intrinsic::x86_sse2_pmulhu_w;
2002     else if (VecWidth == 256)
2003       IID = Intrinsic::x86_avx2_pmulhu_w;
2004     else if (VecWidth == 512)
2005       IID = Intrinsic::x86_avx512_pmulhu_w_512;
2006     else
2007       llvm_unreachable("Unexpected intrinsic");
2008   } else if (Name.startswith("pmaddw.d.")) {
2009     if (VecWidth == 128)
2010       IID = Intrinsic::x86_sse2_pmadd_wd;
2011     else if (VecWidth == 256)
2012       IID = Intrinsic::x86_avx2_pmadd_wd;
2013     else if (VecWidth == 512)
2014       IID = Intrinsic::x86_avx512_pmaddw_d_512;
2015     else
2016       llvm_unreachable("Unexpected intrinsic");
2017   } else if (Name.startswith("pmaddubs.w.")) {
2018     if (VecWidth == 128)
2019       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2020     else if (VecWidth == 256)
2021       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2022     else if (VecWidth == 512)
2023       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2024     else
2025       llvm_unreachable("Unexpected intrinsic");
2026   } else if (Name.startswith("packsswb.")) {
2027     if (VecWidth == 128)
2028       IID = Intrinsic::x86_sse2_packsswb_128;
2029     else if (VecWidth == 256)
2030       IID = Intrinsic::x86_avx2_packsswb;
2031     else if (VecWidth == 512)
2032       IID = Intrinsic::x86_avx512_packsswb_512;
2033     else
2034       llvm_unreachable("Unexpected intrinsic");
2035   } else if (Name.startswith("packssdw.")) {
2036     if (VecWidth == 128)
2037       IID = Intrinsic::x86_sse2_packssdw_128;
2038     else if (VecWidth == 256)
2039       IID = Intrinsic::x86_avx2_packssdw;
2040     else if (VecWidth == 512)
2041       IID = Intrinsic::x86_avx512_packssdw_512;
2042     else
2043       llvm_unreachable("Unexpected intrinsic");
2044   } else if (Name.startswith("packuswb.")) {
2045     if (VecWidth == 128)
2046       IID = Intrinsic::x86_sse2_packuswb_128;
2047     else if (VecWidth == 256)
2048       IID = Intrinsic::x86_avx2_packuswb;
2049     else if (VecWidth == 512)
2050       IID = Intrinsic::x86_avx512_packuswb_512;
2051     else
2052       llvm_unreachable("Unexpected intrinsic");
2053   } else if (Name.startswith("packusdw.")) {
2054     if (VecWidth == 128)
2055       IID = Intrinsic::x86_sse41_packusdw;
2056     else if (VecWidth == 256)
2057       IID = Intrinsic::x86_avx2_packusdw;
2058     else if (VecWidth == 512)
2059       IID = Intrinsic::x86_avx512_packusdw_512;
2060     else
2061       llvm_unreachable("Unexpected intrinsic");
2062   } else if (Name.startswith("vpermilvar.")) {
2063     if (VecWidth == 128 && EltWidth == 32)
2064       IID = Intrinsic::x86_avx_vpermilvar_ps;
2065     else if (VecWidth == 128 && EltWidth == 64)
2066       IID = Intrinsic::x86_avx_vpermilvar_pd;
2067     else if (VecWidth == 256 && EltWidth == 32)
2068       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2069     else if (VecWidth == 256 && EltWidth == 64)
2070       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2071     else if (VecWidth == 512 && EltWidth == 32)
2072       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2073     else if (VecWidth == 512 && EltWidth == 64)
2074       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2075     else
2076       llvm_unreachable("Unexpected intrinsic");
2077   } else if (Name == "cvtpd2dq.256") {
2078     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2079   } else if (Name == "cvtpd2ps.256") {
2080     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2081   } else if (Name == "cvttpd2dq.256") {
2082     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2083   } else if (Name == "cvttps2dq.128") {
2084     IID = Intrinsic::x86_sse2_cvttps2dq;
2085   } else if (Name == "cvttps2dq.256") {
2086     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2087   } else if (Name.startswith("permvar.")) {
2088     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2089     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2090       IID = Intrinsic::x86_avx2_permps;
2091     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2092       IID = Intrinsic::x86_avx2_permd;
2093     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2094       IID = Intrinsic::x86_avx512_permvar_df_256;
2095     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2096       IID = Intrinsic::x86_avx512_permvar_di_256;
2097     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2098       IID = Intrinsic::x86_avx512_permvar_sf_512;
2099     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2100       IID = Intrinsic::x86_avx512_permvar_si_512;
2101     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2102       IID = Intrinsic::x86_avx512_permvar_df_512;
2103     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2104       IID = Intrinsic::x86_avx512_permvar_di_512;
2105     else if (VecWidth == 128 && EltWidth == 16)
2106       IID = Intrinsic::x86_avx512_permvar_hi_128;
2107     else if (VecWidth == 256 && EltWidth == 16)
2108       IID = Intrinsic::x86_avx512_permvar_hi_256;
2109     else if (VecWidth == 512 && EltWidth == 16)
2110       IID = Intrinsic::x86_avx512_permvar_hi_512;
2111     else if (VecWidth == 128 && EltWidth == 8)
2112       IID = Intrinsic::x86_avx512_permvar_qi_128;
2113     else if (VecWidth == 256 && EltWidth == 8)
2114       IID = Intrinsic::x86_avx512_permvar_qi_256;
2115     else if (VecWidth == 512 && EltWidth == 8)
2116       IID = Intrinsic::x86_avx512_permvar_qi_512;
2117     else
2118       llvm_unreachable("Unexpected intrinsic");
2119   } else if (Name.startswith("dbpsadbw.")) {
2120     if (VecWidth == 128)
2121       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2122     else if (VecWidth == 256)
2123       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2124     else if (VecWidth == 512)
2125       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2126     else
2127       llvm_unreachable("Unexpected intrinsic");
2128   } else if (Name.startswith("pmultishift.qb.")) {
2129     if (VecWidth == 128)
2130       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2131     else if (VecWidth == 256)
2132       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2133     else if (VecWidth == 512)
2134       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2135     else
2136       llvm_unreachable("Unexpected intrinsic");
2137   } else if (Name.startswith("conflict.")) {
2138     if (Name[9] == 'd' && VecWidth == 128)
2139       IID = Intrinsic::x86_avx512_conflict_d_128;
2140     else if (Name[9] == 'd' && VecWidth == 256)
2141       IID = Intrinsic::x86_avx512_conflict_d_256;
2142     else if (Name[9] == 'd' && VecWidth == 512)
2143       IID = Intrinsic::x86_avx512_conflict_d_512;
2144     else if (Name[9] == 'q' && VecWidth == 128)
2145       IID = Intrinsic::x86_avx512_conflict_q_128;
2146     else if (Name[9] == 'q' && VecWidth == 256)
2147       IID = Intrinsic::x86_avx512_conflict_q_256;
2148     else if (Name[9] == 'q' && VecWidth == 512)
2149       IID = Intrinsic::x86_avx512_conflict_q_512;
2150     else
2151       llvm_unreachable("Unexpected intrinsic");
2152   } else if (Name.startswith("pavg.")) {
2153     if (Name[5] == 'b' && VecWidth == 128)
2154       IID = Intrinsic::x86_sse2_pavg_b;
2155     else if (Name[5] == 'b' && VecWidth == 256)
2156       IID = Intrinsic::x86_avx2_pavg_b;
2157     else if (Name[5] == 'b' && VecWidth == 512)
2158       IID = Intrinsic::x86_avx512_pavg_b_512;
2159     else if (Name[5] == 'w' && VecWidth == 128)
2160       IID = Intrinsic::x86_sse2_pavg_w;
2161     else if (Name[5] == 'w' && VecWidth == 256)
2162       IID = Intrinsic::x86_avx2_pavg_w;
2163     else if (Name[5] == 'w' && VecWidth == 512)
2164       IID = Intrinsic::x86_avx512_pavg_w_512;
2165     else
2166       llvm_unreachable("Unexpected intrinsic");
2167   } else
2168     return false;
2169 
2170   SmallVector<Value *, 4> Args(CI.args());
2171   Args.pop_back();
2172   Args.pop_back();
2173   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2174                            Args);
2175   unsigned NumArgs = CI.arg_size();
2176   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2177                       CI.getArgOperand(NumArgs - 2));
2178   return true;
2179 }
2180 
2181 /// Upgrade comment in call to inline asm that represents an objc retain release
2182 /// marker.
2183 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2184   size_t Pos;
2185   if (AsmStr->find("mov\tfp") == 0 &&
2186       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2187       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2188     AsmStr->replace(Pos, 1, ";");
2189   }
2190 }
2191 
2192 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2193                                       IRBuilder<> &Builder) {
2194   if (Name == "mve.vctp64.old") {
2195     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2196     // correct type.
2197     Value *VCTP = Builder.CreateCall(
2198         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2199         CI->getArgOperand(0), CI->getName());
2200     Value *C1 = Builder.CreateCall(
2201         Intrinsic::getDeclaration(
2202             F->getParent(), Intrinsic::arm_mve_pred_v2i,
2203             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2204         VCTP);
2205     return Builder.CreateCall(
2206         Intrinsic::getDeclaration(
2207             F->getParent(), Intrinsic::arm_mve_pred_i2v,
2208             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2209         C1);
2210   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2211              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2212              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2213              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2214              Name ==
2215                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2216              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2217              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2218              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2219              Name ==
2220                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2221              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2222              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2223              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2224              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2225              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2226              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2227              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2228     std::vector<Type *> Tys;
2229     unsigned ID = CI->getIntrinsicID();
2230     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2231     switch (ID) {
2232     case Intrinsic::arm_mve_mull_int_predicated:
2233     case Intrinsic::arm_mve_vqdmull_predicated:
2234     case Intrinsic::arm_mve_vldr_gather_base_predicated:
2235       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2236       break;
2237     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2238     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2239     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2240       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2241              V2I1Ty};
2242       break;
2243     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2244       Tys = {CI->getType(), CI->getOperand(0)->getType(),
2245              CI->getOperand(1)->getType(), V2I1Ty};
2246       break;
2247     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2248       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2249              CI->getOperand(2)->getType(), V2I1Ty};
2250       break;
2251     case Intrinsic::arm_cde_vcx1q_predicated:
2252     case Intrinsic::arm_cde_vcx1qa_predicated:
2253     case Intrinsic::arm_cde_vcx2q_predicated:
2254     case Intrinsic::arm_cde_vcx2qa_predicated:
2255     case Intrinsic::arm_cde_vcx3q_predicated:
2256     case Intrinsic::arm_cde_vcx3qa_predicated:
2257       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2258       break;
2259     default:
2260       llvm_unreachable("Unhandled Intrinsic!");
2261     }
2262 
2263     std::vector<Value *> Ops;
2264     for (Value *Op : CI->args()) {
2265       Type *Ty = Op->getType();
2266       if (Ty->getScalarSizeInBits() == 1) {
2267         Value *C1 = Builder.CreateCall(
2268             Intrinsic::getDeclaration(
2269                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2270                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2271             Op);
2272         Op = Builder.CreateCall(
2273             Intrinsic::getDeclaration(F->getParent(),
2274                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2275             C1);
2276       }
2277       Ops.push_back(Op);
2278     }
2279 
2280     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2281     return Builder.CreateCall(Fn, Ops, CI->getName());
2282   }
2283   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2284 }
2285 
2286 static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
2287                                          Function *F, IRBuilder<> &Builder) {
2288   const bool IsInc = Name.startswith("atomic.inc.");
2289   if (IsInc || Name.startswith("atomic.dec.")) {
2290     if (CI->getNumOperands() != 6) // Malformed bitcode.
2291       return nullptr;
2292 
2293     AtomicRMWInst::BinOp RMWOp =
2294         IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2295 
2296     Value *Ptr = CI->getArgOperand(0);
2297     Value *Val = CI->getArgOperand(1);
2298     ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2299     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2300 
2301     AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2302     if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2303       Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2304     if (Order == AtomicOrdering::NotAtomic ||
2305         Order == AtomicOrdering::Unordered)
2306       Order = AtomicOrdering::SequentiallyConsistent;
2307 
2308     AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order);
2309 
2310     if (!VolatileArg || !VolatileArg->isZero())
2311       RMW->setVolatile(true);
2312     return RMW;
2313   }
2314 
2315   llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2316 }
2317 
2318 /// Upgrade a call to an old intrinsic. All argument and return casting must be
2319 /// provided to seamlessly integrate with existing context.
2320 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
2321   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2322   // checks the callee's function type matches. It's likely we need to handle
2323   // type changes here.
2324   Function *F = dyn_cast<Function>(CI->getCalledOperand());
2325   if (!F)
2326     return;
2327 
2328   LLVMContext &C = CI->getContext();
2329   IRBuilder<> Builder(C);
2330   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2331 
2332   if (!NewFn) {
2333     // Get the Function's name.
2334     StringRef Name = F->getName();
2335 
2336     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2337     Name = Name.substr(5);
2338 
2339     bool IsX86 = Name.startswith("x86.");
2340     if (IsX86)
2341       Name = Name.substr(4);
2342     bool IsNVVM = Name.startswith("nvvm.");
2343     if (IsNVVM)
2344       Name = Name.substr(5);
2345     bool IsARM = Name.startswith("arm.");
2346     if (IsARM)
2347       Name = Name.substr(4);
2348     bool IsAMDGCN = Name.startswith("amdgcn.");
2349     if (IsAMDGCN)
2350       Name = Name.substr(7);
2351 
2352     if (IsX86 && Name.startswith("sse4a.movnt.")) {
2353       SmallVector<Metadata *, 1> Elts;
2354       Elts.push_back(
2355           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2356       MDNode *Node = MDNode::get(C, Elts);
2357 
2358       Value *Arg0 = CI->getArgOperand(0);
2359       Value *Arg1 = CI->getArgOperand(1);
2360 
2361       // Nontemporal (unaligned) store of the 0'th element of the float/double
2362       // vector.
2363       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2364       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2365       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2366       Value *Extract =
2367           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2368 
2369       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2370       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2371 
2372       // Remove intrinsic.
2373       CI->eraseFromParent();
2374       return;
2375     }
2376 
2377     if (IsX86 && (Name.startswith("avx.movnt.") ||
2378                   Name.startswith("avx512.storent."))) {
2379       SmallVector<Metadata *, 1> Elts;
2380       Elts.push_back(
2381           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2382       MDNode *Node = MDNode::get(C, Elts);
2383 
2384       Value *Arg0 = CI->getArgOperand(0);
2385       Value *Arg1 = CI->getArgOperand(1);
2386 
2387       // Convert the type of the pointer to a pointer to the stored type.
2388       Value *BC = Builder.CreateBitCast(Arg0,
2389                                         PointerType::getUnqual(Arg1->getType()),
2390                                         "cast");
2391       StoreInst *SI = Builder.CreateAlignedStore(
2392           Arg1, BC,
2393           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2394       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2395 
2396       // Remove intrinsic.
2397       CI->eraseFromParent();
2398       return;
2399     }
2400 
2401     if (IsX86 && Name == "sse2.storel.dq") {
2402       Value *Arg0 = CI->getArgOperand(0);
2403       Value *Arg1 = CI->getArgOperand(1);
2404 
2405       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2406       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2407       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2408       Value *BC = Builder.CreateBitCast(Arg0,
2409                                         PointerType::getUnqual(Elt->getType()),
2410                                         "cast");
2411       Builder.CreateAlignedStore(Elt, BC, Align(1));
2412 
2413       // Remove intrinsic.
2414       CI->eraseFromParent();
2415       return;
2416     }
2417 
2418     if (IsX86 && (Name.startswith("sse.storeu.") ||
2419                   Name.startswith("sse2.storeu.") ||
2420                   Name.startswith("avx.storeu."))) {
2421       Value *Arg0 = CI->getArgOperand(0);
2422       Value *Arg1 = CI->getArgOperand(1);
2423 
2424       Arg0 = Builder.CreateBitCast(Arg0,
2425                                    PointerType::getUnqual(Arg1->getType()),
2426                                    "cast");
2427       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2428 
2429       // Remove intrinsic.
2430       CI->eraseFromParent();
2431       return;
2432     }
2433 
2434     if (IsX86 && Name == "avx512.mask.store.ss") {
2435       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2436       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2437                          Mask, false);
2438 
2439       // Remove intrinsic.
2440       CI->eraseFromParent();
2441       return;
2442     }
2443 
2444     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2445       // "avx512.mask.storeu." or "avx512.mask.store."
2446       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2447       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2448                          CI->getArgOperand(2), Aligned);
2449 
2450       // Remove intrinsic.
2451       CI->eraseFromParent();
2452       return;
2453     }
2454 
2455     Value *Rep;
2456     // Upgrade packed integer vector compare intrinsics to compare instructions.
2457     if (IsX86 && (Name.startswith("sse2.pcmp") ||
2458                   Name.startswith("avx2.pcmp"))) {
2459       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2460       bool CmpEq = Name[9] == 'e';
2461       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2462                                CI->getArgOperand(0), CI->getArgOperand(1));
2463       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2464     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2465       Type *ExtTy = Type::getInt32Ty(C);
2466       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2467         ExtTy = Type::getInt64Ty(C);
2468       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2469                          ExtTy->getPrimitiveSizeInBits();
2470       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2471       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2472     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2473                          Name == "sse2.sqrt.sd")) {
2474       Value *Vec = CI->getArgOperand(0);
2475       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2476       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2477                                                  Intrinsic::sqrt, Elt0->getType());
2478       Elt0 = Builder.CreateCall(Intr, Elt0);
2479       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2480     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2481                          Name.startswith("sse2.sqrt.p") ||
2482                          Name.startswith("sse.sqrt.p"))) {
2483       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2484                                                          Intrinsic::sqrt,
2485                                                          CI->getType()),
2486                                {CI->getArgOperand(0)});
2487     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2488       if (CI->arg_size() == 4 &&
2489           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2490            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2491         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2492                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2493 
2494         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2495         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2496                                                            IID), Args);
2497       } else {
2498         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2499                                                            Intrinsic::sqrt,
2500                                                            CI->getType()),
2501                                  {CI->getArgOperand(0)});
2502       }
2503       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2504                           CI->getArgOperand(1));
2505     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2506                          Name.startswith("avx512.ptestnm"))) {
2507       Value *Op0 = CI->getArgOperand(0);
2508       Value *Op1 = CI->getArgOperand(1);
2509       Value *Mask = CI->getArgOperand(2);
2510       Rep = Builder.CreateAnd(Op0, Op1);
2511       llvm::Type *Ty = Op0->getType();
2512       Value *Zero = llvm::Constant::getNullValue(Ty);
2513       ICmpInst::Predicate Pred =
2514         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2515       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2516       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2517     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2518       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2519                              ->getNumElements();
2520       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2521       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2522                           CI->getArgOperand(1));
2523     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2524       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2525       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2526       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2527       int Indices[64];
2528       for (unsigned i = 0; i != NumElts; ++i)
2529         Indices[i] = i;
2530 
2531       // First extract half of each vector. This gives better codegen than
2532       // doing it in a single shuffle.
2533       LHS =
2534           Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2535       RHS =
2536           Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2537       // Concat the vectors.
2538       // NOTE: Operands have to be swapped to match intrinsic definition.
2539       Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2540       Rep = Builder.CreateBitCast(Rep, CI->getType());
2541     } else if (IsX86 && Name == "avx512.kand.w") {
2542       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2543       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2544       Rep = Builder.CreateAnd(LHS, RHS);
2545       Rep = Builder.CreateBitCast(Rep, CI->getType());
2546     } else if (IsX86 && Name == "avx512.kandn.w") {
2547       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2548       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2549       LHS = Builder.CreateNot(LHS);
2550       Rep = Builder.CreateAnd(LHS, RHS);
2551       Rep = Builder.CreateBitCast(Rep, CI->getType());
2552     } else if (IsX86 && Name == "avx512.kor.w") {
2553       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2554       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2555       Rep = Builder.CreateOr(LHS, RHS);
2556       Rep = Builder.CreateBitCast(Rep, CI->getType());
2557     } else if (IsX86 && Name == "avx512.kxor.w") {
2558       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560       Rep = Builder.CreateXor(LHS, RHS);
2561       Rep = Builder.CreateBitCast(Rep, CI->getType());
2562     } else if (IsX86 && Name == "avx512.kxnor.w") {
2563       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2564       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2565       LHS = Builder.CreateNot(LHS);
2566       Rep = Builder.CreateXor(LHS, RHS);
2567       Rep = Builder.CreateBitCast(Rep, CI->getType());
2568     } else if (IsX86 && Name == "avx512.knot.w") {
2569       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2570       Rep = Builder.CreateNot(Rep);
2571       Rep = Builder.CreateBitCast(Rep, CI->getType());
2572     } else if (IsX86 &&
2573                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2574       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2575       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2576       Rep = Builder.CreateOr(LHS, RHS);
2577       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2578       Value *C;
2579       if (Name[14] == 'c')
2580         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2581       else
2582         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2583       Rep = Builder.CreateICmpEQ(Rep, C);
2584       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2585     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2586                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2587                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2588                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2589       Type *I32Ty = Type::getInt32Ty(C);
2590       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2591                                                  ConstantInt::get(I32Ty, 0));
2592       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2593                                                  ConstantInt::get(I32Ty, 0));
2594       Value *EltOp;
2595       if (Name.contains(".add."))
2596         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2597       else if (Name.contains(".sub."))
2598         EltOp = Builder.CreateFSub(Elt0, Elt1);
2599       else if (Name.contains(".mul."))
2600         EltOp = Builder.CreateFMul(Elt0, Elt1);
2601       else
2602         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2603       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2604                                         ConstantInt::get(I32Ty, 0));
2605     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2606       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2607       bool CmpEq = Name[16] == 'e';
2608       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2609     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2610       Type *OpTy = CI->getArgOperand(0)->getType();
2611       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2612       Intrinsic::ID IID;
2613       switch (VecWidth) {
2614       default: llvm_unreachable("Unexpected intrinsic");
2615       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2616       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2617       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2618       }
2619 
2620       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2621                                { CI->getOperand(0), CI->getArgOperand(1) });
2622       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2623     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2624       Type *OpTy = CI->getArgOperand(0)->getType();
2625       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2626       unsigned EltWidth = OpTy->getScalarSizeInBits();
2627       Intrinsic::ID IID;
2628       if (VecWidth == 128 && EltWidth == 32)
2629         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2630       else if (VecWidth == 256 && EltWidth == 32)
2631         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2632       else if (VecWidth == 512 && EltWidth == 32)
2633         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2634       else if (VecWidth == 128 && EltWidth == 64)
2635         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2636       else if (VecWidth == 256 && EltWidth == 64)
2637         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2638       else if (VecWidth == 512 && EltWidth == 64)
2639         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2640       else
2641         llvm_unreachable("Unexpected intrinsic");
2642 
2643       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2644                                { CI->getOperand(0), CI->getArgOperand(1) });
2645       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2646     } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2647       SmallVector<Value *, 4> Args(CI->args());
2648       Type *OpTy = Args[0]->getType();
2649       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2650       unsigned EltWidth = OpTy->getScalarSizeInBits();
2651       Intrinsic::ID IID;
2652       if (VecWidth == 128 && EltWidth == 32)
2653         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2654       else if (VecWidth == 256 && EltWidth == 32)
2655         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2656       else if (VecWidth == 512 && EltWidth == 32)
2657         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2658       else if (VecWidth == 128 && EltWidth == 64)
2659         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2660       else if (VecWidth == 256 && EltWidth == 64)
2661         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2662       else if (VecWidth == 512 && EltWidth == 64)
2663         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2664       else
2665         llvm_unreachable("Unexpected intrinsic");
2666 
2667       Value *Mask = Constant::getAllOnesValue(CI->getType());
2668       if (VecWidth == 512)
2669         std::swap(Mask, Args.back());
2670       Args.push_back(Mask);
2671 
2672       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2673                                Args);
2674     } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2675       // Integer compare intrinsics.
2676       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2677       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2678     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2679       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2680       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2681     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2682                          Name.startswith("avx512.cvtw2mask.") ||
2683                          Name.startswith("avx512.cvtd2mask.") ||
2684                          Name.startswith("avx512.cvtq2mask."))) {
2685       Value *Op = CI->getArgOperand(0);
2686       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2687       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2688       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2689     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2690                         Name == "ssse3.pabs.w.128" ||
2691                         Name == "ssse3.pabs.d.128" ||
2692                         Name.startswith("avx2.pabs") ||
2693                         Name.startswith("avx512.mask.pabs"))) {
2694       Rep = upgradeAbs(Builder, *CI);
2695     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2696                          Name == "sse2.pmaxs.w" ||
2697                          Name == "sse41.pmaxsd" ||
2698                          Name.startswith("avx2.pmaxs") ||
2699                          Name.startswith("avx512.mask.pmaxs"))) {
2700       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2701     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2702                          Name == "sse41.pmaxuw" ||
2703                          Name == "sse41.pmaxud" ||
2704                          Name.startswith("avx2.pmaxu") ||
2705                          Name.startswith("avx512.mask.pmaxu"))) {
2706       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2707     } else if (IsX86 && (Name == "sse41.pminsb" ||
2708                          Name == "sse2.pmins.w" ||
2709                          Name == "sse41.pminsd" ||
2710                          Name.startswith("avx2.pmins") ||
2711                          Name.startswith("avx512.mask.pmins"))) {
2712       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2713     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2714                          Name == "sse41.pminuw" ||
2715                          Name == "sse41.pminud" ||
2716                          Name.startswith("avx2.pminu") ||
2717                          Name.startswith("avx512.mask.pminu"))) {
2718       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2719     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2720                          Name == "avx2.pmulu.dq" ||
2721                          Name == "avx512.pmulu.dq.512" ||
2722                          Name.startswith("avx512.mask.pmulu.dq."))) {
2723       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2724     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2725                          Name == "avx2.pmul.dq" ||
2726                          Name == "avx512.pmul.dq.512" ||
2727                          Name.startswith("avx512.mask.pmul.dq."))) {
2728       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2729     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2730                          Name == "sse2.cvtsi2sd" ||
2731                          Name == "sse.cvtsi642ss" ||
2732                          Name == "sse2.cvtsi642sd")) {
2733       Rep = Builder.CreateSIToFP(
2734           CI->getArgOperand(1),
2735           cast<VectorType>(CI->getType())->getElementType());
2736       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2737     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2738       Rep = Builder.CreateUIToFP(
2739           CI->getArgOperand(1),
2740           cast<VectorType>(CI->getType())->getElementType());
2741       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2742     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2743       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2744       Rep = Builder.CreateFPExt(
2745           Rep, cast<VectorType>(CI->getType())->getElementType());
2746       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2747     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2748                          Name == "sse2.cvtdq2ps" ||
2749                          Name == "avx.cvtdq2.pd.256" ||
2750                          Name == "avx.cvtdq2.ps.256" ||
2751                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2752                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2753                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2754                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2755                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2756                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2757                          Name == "avx512.mask.cvtqq2ps.256" ||
2758                          Name == "avx512.mask.cvtqq2ps.512" ||
2759                          Name == "avx512.mask.cvtuqq2ps.256" ||
2760                          Name == "avx512.mask.cvtuqq2ps.512" ||
2761                          Name == "sse2.cvtps2pd" ||
2762                          Name == "avx.cvt.ps2.pd.256" ||
2763                          Name == "avx512.mask.cvtps2pd.128" ||
2764                          Name == "avx512.mask.cvtps2pd.256")) {
2765       auto *DstTy = cast<FixedVectorType>(CI->getType());
2766       Rep = CI->getArgOperand(0);
2767       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2768 
2769       unsigned NumDstElts = DstTy->getNumElements();
2770       if (NumDstElts < SrcTy->getNumElements()) {
2771         assert(NumDstElts == 2 && "Unexpected vector size");
2772         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2773       }
2774 
2775       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2776       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2777       if (IsPS2PD)
2778         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2779       else if (CI->arg_size() == 4 &&
2780                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2781                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2782         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2783                                        : Intrinsic::x86_avx512_sitofp_round;
2784         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2785                                                 { DstTy, SrcTy });
2786         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2787       } else {
2788         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2789                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2790       }
2791 
2792       if (CI->arg_size() >= 3)
2793         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2794                             CI->getArgOperand(1));
2795     } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2796                          Name.startswith("vcvtph2ps."))) {
2797       auto *DstTy = cast<FixedVectorType>(CI->getType());
2798       Rep = CI->getArgOperand(0);
2799       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2800       unsigned NumDstElts = DstTy->getNumElements();
2801       if (NumDstElts != SrcTy->getNumElements()) {
2802         assert(NumDstElts == 4 && "Unexpected vector size");
2803         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2804       }
2805       Rep = Builder.CreateBitCast(
2806           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2807       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2808       if (CI->arg_size() >= 3)
2809         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2810                             CI->getArgOperand(1));
2811     } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2812       // "avx512.mask.loadu." or "avx512.mask.load."
2813       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2814       Rep =
2815           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2816                             CI->getArgOperand(2), Aligned);
2817     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2818       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2819       Type *PtrTy = ResultTy->getElementType();
2820 
2821       // Cast the pointer to element type.
2822       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2823                                          llvm::PointerType::getUnqual(PtrTy));
2824 
2825       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2826                                      ResultTy->getNumElements());
2827 
2828       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2829                                                 Intrinsic::masked_expandload,
2830                                                 ResultTy);
2831       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2832     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2833       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2834       Type *PtrTy = ResultTy->getElementType();
2835 
2836       // Cast the pointer to element type.
2837       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2838                                          llvm::PointerType::getUnqual(PtrTy));
2839 
2840       Value *MaskVec =
2841           getX86MaskVec(Builder, CI->getArgOperand(2),
2842                         cast<FixedVectorType>(ResultTy)->getNumElements());
2843 
2844       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2845                                                 Intrinsic::masked_compressstore,
2846                                                 ResultTy);
2847       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2848     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2849                          Name.startswith("avx512.mask.expand."))) {
2850       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2851 
2852       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2853                                      ResultTy->getNumElements());
2854 
2855       bool IsCompress = Name[12] == 'c';
2856       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2857                                      : Intrinsic::x86_avx512_mask_expand;
2858       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2859       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2860                                        MaskVec });
2861     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2862       bool IsSigned;
2863       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2864           Name.endswith("uq"))
2865         IsSigned = false;
2866       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2867                Name.endswith("q"))
2868         IsSigned = true;
2869       else
2870         llvm_unreachable("Unknown suffix");
2871 
2872       unsigned Imm;
2873       if (CI->arg_size() == 3) {
2874         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2875       } else {
2876         Name = Name.substr(9); // strip off "xop.vpcom"
2877         if (Name.startswith("lt"))
2878           Imm = 0;
2879         else if (Name.startswith("le"))
2880           Imm = 1;
2881         else if (Name.startswith("gt"))
2882           Imm = 2;
2883         else if (Name.startswith("ge"))
2884           Imm = 3;
2885         else if (Name.startswith("eq"))
2886           Imm = 4;
2887         else if (Name.startswith("ne"))
2888           Imm = 5;
2889         else if (Name.startswith("false"))
2890           Imm = 6;
2891         else if (Name.startswith("true"))
2892           Imm = 7;
2893         else
2894           llvm_unreachable("Unknown condition");
2895       }
2896 
2897       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2898     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2899       Value *Sel = CI->getArgOperand(2);
2900       Value *NotSel = Builder.CreateNot(Sel);
2901       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2902       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2903       Rep = Builder.CreateOr(Sel0, Sel1);
2904     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2905                          Name.startswith("avx512.prol") ||
2906                          Name.startswith("avx512.mask.prol"))) {
2907       Rep = upgradeX86Rotate(Builder, *CI, false);
2908     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2909                          Name.startswith("avx512.mask.pror"))) {
2910       Rep = upgradeX86Rotate(Builder, *CI, true);
2911     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2912                          Name.startswith("avx512.mask.vpshld") ||
2913                          Name.startswith("avx512.maskz.vpshld"))) {
2914       bool ZeroMask = Name[11] == 'z';
2915       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2916     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2917                          Name.startswith("avx512.mask.vpshrd") ||
2918                          Name.startswith("avx512.maskz.vpshrd"))) {
2919       bool ZeroMask = Name[11] == 'z';
2920       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2921     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2922       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2923                                                Intrinsic::x86_sse42_crc32_32_8);
2924       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2925       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2926       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2927     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2928                          Name.startswith("avx512.vbroadcast.s"))) {
2929       // Replace broadcasts with a series of insertelements.
2930       auto *VecTy = cast<FixedVectorType>(CI->getType());
2931       Type *EltTy = VecTy->getElementType();
2932       unsigned EltNum = VecTy->getNumElements();
2933       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2934                                           EltTy->getPointerTo());
2935       Value *Load = Builder.CreateLoad(EltTy, Cast);
2936       Type *I32Ty = Type::getInt32Ty(C);
2937       Rep = PoisonValue::get(VecTy);
2938       for (unsigned I = 0; I < EltNum; ++I)
2939         Rep = Builder.CreateInsertElement(Rep, Load,
2940                                           ConstantInt::get(I32Ty, I));
2941     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2942                          Name.startswith("sse41.pmovzx") ||
2943                          Name.startswith("avx2.pmovsx") ||
2944                          Name.startswith("avx2.pmovzx") ||
2945                          Name.startswith("avx512.mask.pmovsx") ||
2946                          Name.startswith("avx512.mask.pmovzx"))) {
2947       auto *DstTy = cast<FixedVectorType>(CI->getType());
2948       unsigned NumDstElts = DstTy->getNumElements();
2949 
2950       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2951       SmallVector<int, 8> ShuffleMask(NumDstElts);
2952       for (unsigned i = 0; i != NumDstElts; ++i)
2953         ShuffleMask[i] = i;
2954 
2955       Value *SV =
2956           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2957 
2958       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2959       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2960                    : Builder.CreateZExt(SV, DstTy);
2961       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2962       if (CI->arg_size() == 3)
2963         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2964                             CI->getArgOperand(1));
2965     } else if (Name == "avx512.mask.pmov.qd.256" ||
2966                Name == "avx512.mask.pmov.qd.512" ||
2967                Name == "avx512.mask.pmov.wb.256" ||
2968                Name == "avx512.mask.pmov.wb.512") {
2969       Type *Ty = CI->getArgOperand(1)->getType();
2970       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2971       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2972                           CI->getArgOperand(1));
2973     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2974                          Name == "avx2.vbroadcasti128")) {
2975       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2976       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2977       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2978       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2979       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2980                                             PointerType::getUnqual(VT));
2981       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2982       if (NumSrcElts == 2)
2983         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2984       else
2985         Rep = Builder.CreateShuffleVector(
2986             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2987     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2988                          Name.startswith("avx512.mask.shuf.f"))) {
2989       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2990       Type *VT = CI->getType();
2991       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2992       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2993       unsigned ControlBitsMask = NumLanes - 1;
2994       unsigned NumControlBits = NumLanes / 2;
2995       SmallVector<int, 8> ShuffleMask(0);
2996 
2997       for (unsigned l = 0; l != NumLanes; ++l) {
2998         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2999         // We actually need the other source.
3000         if (l >= NumLanes / 2)
3001           LaneMask += NumLanes;
3002         for (unsigned i = 0; i != NumElementsInLane; ++i)
3003           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3004       }
3005       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3006                                         CI->getArgOperand(1), ShuffleMask);
3007       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3008                           CI->getArgOperand(3));
3009     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
3010                          Name.startswith("avx512.mask.broadcasti"))) {
3011       unsigned NumSrcElts =
3012           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3013               ->getNumElements();
3014       unsigned NumDstElts =
3015           cast<FixedVectorType>(CI->getType())->getNumElements();
3016 
3017       SmallVector<int, 8> ShuffleMask(NumDstElts);
3018       for (unsigned i = 0; i != NumDstElts; ++i)
3019         ShuffleMask[i] = i % NumSrcElts;
3020 
3021       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3022                                         CI->getArgOperand(0),
3023                                         ShuffleMask);
3024       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3025                           CI->getArgOperand(1));
3026     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
3027                          Name.startswith("avx2.vbroadcast") ||
3028                          Name.startswith("avx512.pbroadcast") ||
3029                          Name.startswith("avx512.mask.broadcast.s"))) {
3030       // Replace vp?broadcasts with a vector shuffle.
3031       Value *Op = CI->getArgOperand(0);
3032       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3033       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3034       SmallVector<int, 8> M;
3035       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3036       Rep = Builder.CreateShuffleVector(Op, M);
3037 
3038       if (CI->arg_size() == 3)
3039         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3040                             CI->getArgOperand(1));
3041     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
3042                          Name.startswith("avx2.padds.") ||
3043                          Name.startswith("avx512.padds.") ||
3044                          Name.startswith("avx512.mask.padds."))) {
3045       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3046     } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
3047                          Name.startswith("avx2.psubs.") ||
3048                          Name.startswith("avx512.psubs.") ||
3049                          Name.startswith("avx512.mask.psubs."))) {
3050       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3051     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
3052                          Name.startswith("avx2.paddus.") ||
3053                          Name.startswith("avx512.mask.paddus."))) {
3054       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3055     } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
3056                          Name.startswith("avx2.psubus.") ||
3057                          Name.startswith("avx512.mask.psubus."))) {
3058       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3059     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
3060       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3061                                       CI->getArgOperand(1),
3062                                       CI->getArgOperand(2),
3063                                       CI->getArgOperand(3),
3064                                       CI->getArgOperand(4),
3065                                       false);
3066     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
3067       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3068                                       CI->getArgOperand(1),
3069                                       CI->getArgOperand(2),
3070                                       CI->getArgOperand(3),
3071                                       CI->getArgOperand(4),
3072                                       true);
3073     } else if (IsX86 && (Name == "sse2.psll.dq" ||
3074                          Name == "avx2.psll.dq")) {
3075       // 128/256-bit shift left specified in bits.
3076       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3077       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3078                                        Shift / 8); // Shift is in bits.
3079     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3080                          Name == "avx2.psrl.dq")) {
3081       // 128/256-bit shift right specified in bits.
3082       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3083       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3084                                        Shift / 8); // Shift is in bits.
3085     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3086                          Name == "avx2.psll.dq.bs" ||
3087                          Name == "avx512.psll.dq.512")) {
3088       // 128/256/512-bit shift left specified in bytes.
3089       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3090       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3091     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3092                          Name == "avx2.psrl.dq.bs" ||
3093                          Name == "avx512.psrl.dq.512")) {
3094       // 128/256/512-bit shift right specified in bytes.
3095       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3096       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3097     } else if (IsX86 && (Name == "sse41.pblendw" ||
3098                          Name.startswith("sse41.blendp") ||
3099                          Name.startswith("avx.blend.p") ||
3100                          Name == "avx2.pblendw" ||
3101                          Name.startswith("avx2.pblendd."))) {
3102       Value *Op0 = CI->getArgOperand(0);
3103       Value *Op1 = CI->getArgOperand(1);
3104       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3105       auto *VecTy = cast<FixedVectorType>(CI->getType());
3106       unsigned NumElts = VecTy->getNumElements();
3107 
3108       SmallVector<int, 16> Idxs(NumElts);
3109       for (unsigned i = 0; i != NumElts; ++i)
3110         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3111 
3112       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3113     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
3114                          Name == "avx2.vinserti128" ||
3115                          Name.startswith("avx512.mask.insert"))) {
3116       Value *Op0 = CI->getArgOperand(0);
3117       Value *Op1 = CI->getArgOperand(1);
3118       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3119       unsigned DstNumElts =
3120           cast<FixedVectorType>(CI->getType())->getNumElements();
3121       unsigned SrcNumElts =
3122           cast<FixedVectorType>(Op1->getType())->getNumElements();
3123       unsigned Scale = DstNumElts / SrcNumElts;
3124 
3125       // Mask off the high bits of the immediate value; hardware ignores those.
3126       Imm = Imm % Scale;
3127 
3128       // Extend the second operand into a vector the size of the destination.
3129       SmallVector<int, 8> Idxs(DstNumElts);
3130       for (unsigned i = 0; i != SrcNumElts; ++i)
3131         Idxs[i] = i;
3132       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3133         Idxs[i] = SrcNumElts;
3134       Rep = Builder.CreateShuffleVector(Op1, Idxs);
3135 
3136       // Insert the second operand into the first operand.
3137 
3138       // Note that there is no guarantee that instruction lowering will actually
3139       // produce a vinsertf128 instruction for the created shuffles. In
3140       // particular, the 0 immediate case involves no lane changes, so it can
3141       // be handled as a blend.
3142 
3143       // Example of shuffle mask for 32-bit elements:
3144       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3145       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3146 
3147       // First fill with identify mask.
3148       for (unsigned i = 0; i != DstNumElts; ++i)
3149         Idxs[i] = i;
3150       // Then replace the elements where we need to insert.
3151       for (unsigned i = 0; i != SrcNumElts; ++i)
3152         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3153       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3154 
3155       // If the intrinsic has a mask operand, handle that.
3156       if (CI->arg_size() == 5)
3157         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3158                             CI->getArgOperand(3));
3159     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
3160                          Name == "avx2.vextracti128" ||
3161                          Name.startswith("avx512.mask.vextract"))) {
3162       Value *Op0 = CI->getArgOperand(0);
3163       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3164       unsigned DstNumElts =
3165           cast<FixedVectorType>(CI->getType())->getNumElements();
3166       unsigned SrcNumElts =
3167           cast<FixedVectorType>(Op0->getType())->getNumElements();
3168       unsigned Scale = SrcNumElts / DstNumElts;
3169 
3170       // Mask off the high bits of the immediate value; hardware ignores those.
3171       Imm = Imm % Scale;
3172 
3173       // Get indexes for the subvector of the input vector.
3174       SmallVector<int, 8> Idxs(DstNumElts);
3175       for (unsigned i = 0; i != DstNumElts; ++i) {
3176         Idxs[i] = i + (Imm * DstNumElts);
3177       }
3178       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3179 
3180       // If the intrinsic has a mask operand, handle that.
3181       if (CI->arg_size() == 4)
3182         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3183                             CI->getArgOperand(2));
3184     } else if (!IsX86 && Name == "stackprotectorcheck") {
3185       Rep = nullptr;
3186     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
3187                          Name.startswith("avx512.mask.perm.di."))) {
3188       Value *Op0 = CI->getArgOperand(0);
3189       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3190       auto *VecTy = cast<FixedVectorType>(CI->getType());
3191       unsigned NumElts = VecTy->getNumElements();
3192 
3193       SmallVector<int, 8> Idxs(NumElts);
3194       for (unsigned i = 0; i != NumElts; ++i)
3195         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3196 
3197       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3198 
3199       if (CI->arg_size() == 4)
3200         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3201                             CI->getArgOperand(2));
3202     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
3203                          Name == "avx2.vperm2i128")) {
3204       // The immediate permute control byte looks like this:
3205       //    [1:0] - select 128 bits from sources for low half of destination
3206       //    [2]   - ignore
3207       //    [3]   - zero low half of destination
3208       //    [5:4] - select 128 bits from sources for high half of destination
3209       //    [6]   - ignore
3210       //    [7]   - zero high half of destination
3211 
3212       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3213 
3214       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3215       unsigned HalfSize = NumElts / 2;
3216       SmallVector<int, 8> ShuffleMask(NumElts);
3217 
3218       // Determine which operand(s) are actually in use for this instruction.
3219       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3220       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3221 
3222       // If needed, replace operands based on zero mask.
3223       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3224       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3225 
3226       // Permute low half of result.
3227       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3228       for (unsigned i = 0; i < HalfSize; ++i)
3229         ShuffleMask[i] = StartIndex + i;
3230 
3231       // Permute high half of result.
3232       StartIndex = (Imm & 0x10) ? HalfSize : 0;
3233       for (unsigned i = 0; i < HalfSize; ++i)
3234         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3235 
3236       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3237 
3238     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
3239                          Name == "sse2.pshuf.d" ||
3240                          Name.startswith("avx512.mask.vpermil.p") ||
3241                          Name.startswith("avx512.mask.pshuf.d."))) {
3242       Value *Op0 = CI->getArgOperand(0);
3243       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3244       auto *VecTy = cast<FixedVectorType>(CI->getType());
3245       unsigned NumElts = VecTy->getNumElements();
3246       // Calculate the size of each index in the immediate.
3247       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3248       unsigned IdxMask = ((1 << IdxSize) - 1);
3249 
3250       SmallVector<int, 8> Idxs(NumElts);
3251       // Lookup the bits for this element, wrapping around the immediate every
3252       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3253       // to offset by the first index of each group.
3254       for (unsigned i = 0; i != NumElts; ++i)
3255         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3256 
3257       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3258 
3259       if (CI->arg_size() == 4)
3260         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3261                             CI->getArgOperand(2));
3262     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3263                          Name.startswith("avx512.mask.pshufl.w."))) {
3264       Value *Op0 = CI->getArgOperand(0);
3265       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3266       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3267 
3268       SmallVector<int, 16> Idxs(NumElts);
3269       for (unsigned l = 0; l != NumElts; l += 8) {
3270         for (unsigned i = 0; i != 4; ++i)
3271           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3272         for (unsigned i = 4; i != 8; ++i)
3273           Idxs[i + l] = i + l;
3274       }
3275 
3276       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3277 
3278       if (CI->arg_size() == 4)
3279         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3280                             CI->getArgOperand(2));
3281     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3282                          Name.startswith("avx512.mask.pshufh.w."))) {
3283       Value *Op0 = CI->getArgOperand(0);
3284       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3285       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3286 
3287       SmallVector<int, 16> Idxs(NumElts);
3288       for (unsigned l = 0; l != NumElts; l += 8) {
3289         for (unsigned i = 0; i != 4; ++i)
3290           Idxs[i + l] = i + l;
3291         for (unsigned i = 0; i != 4; ++i)
3292           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3293       }
3294 
3295       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3296 
3297       if (CI->arg_size() == 4)
3298         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3299                             CI->getArgOperand(2));
3300     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
3301       Value *Op0 = CI->getArgOperand(0);
3302       Value *Op1 = CI->getArgOperand(1);
3303       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3304       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3305 
3306       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3307       unsigned HalfLaneElts = NumLaneElts / 2;
3308 
3309       SmallVector<int, 16> Idxs(NumElts);
3310       for (unsigned i = 0; i != NumElts; ++i) {
3311         // Base index is the starting element of the lane.
3312         Idxs[i] = i - (i % NumLaneElts);
3313         // If we are half way through the lane switch to the other source.
3314         if ((i % NumLaneElts) >= HalfLaneElts)
3315           Idxs[i] += NumElts;
3316         // Now select the specific element. By adding HalfLaneElts bits from
3317         // the immediate. Wrapping around the immediate every 8-bits.
3318         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3319       }
3320 
3321       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3322 
3323       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3324                           CI->getArgOperand(3));
3325     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
3326                          Name.startswith("avx512.mask.movshdup") ||
3327                          Name.startswith("avx512.mask.movsldup"))) {
3328       Value *Op0 = CI->getArgOperand(0);
3329       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3330       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3331 
3332       unsigned Offset = 0;
3333       if (Name.startswith("avx512.mask.movshdup."))
3334         Offset = 1;
3335 
3336       SmallVector<int, 16> Idxs(NumElts);
3337       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3338         for (unsigned i = 0; i != NumLaneElts; i += 2) {
3339           Idxs[i + l + 0] = i + l + Offset;
3340           Idxs[i + l + 1] = i + l + Offset;
3341         }
3342 
3343       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3344 
3345       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3346                           CI->getArgOperand(1));
3347     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
3348                          Name.startswith("avx512.mask.unpckl."))) {
3349       Value *Op0 = CI->getArgOperand(0);
3350       Value *Op1 = CI->getArgOperand(1);
3351       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3352       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3353 
3354       SmallVector<int, 64> Idxs(NumElts);
3355       for (int l = 0; l != NumElts; l += NumLaneElts)
3356         for (int i = 0; i != NumLaneElts; ++i)
3357           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3358 
3359       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3360 
3361       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3362                           CI->getArgOperand(2));
3363     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
3364                          Name.startswith("avx512.mask.unpckh."))) {
3365       Value *Op0 = CI->getArgOperand(0);
3366       Value *Op1 = CI->getArgOperand(1);
3367       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3368       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3369 
3370       SmallVector<int, 64> Idxs(NumElts);
3371       for (int l = 0; l != NumElts; l += NumLaneElts)
3372         for (int i = 0; i != NumLaneElts; ++i)
3373           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3374 
3375       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3376 
3377       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3378                           CI->getArgOperand(2));
3379     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
3380                          Name.startswith("avx512.mask.pand."))) {
3381       VectorType *FTy = cast<VectorType>(CI->getType());
3382       VectorType *ITy = VectorType::getInteger(FTy);
3383       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3384                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3385       Rep = Builder.CreateBitCast(Rep, FTy);
3386       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3387                           CI->getArgOperand(2));
3388     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
3389                          Name.startswith("avx512.mask.pandn."))) {
3390       VectorType *FTy = cast<VectorType>(CI->getType());
3391       VectorType *ITy = VectorType::getInteger(FTy);
3392       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3393       Rep = Builder.CreateAnd(Rep,
3394                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3395       Rep = Builder.CreateBitCast(Rep, FTy);
3396       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3397                           CI->getArgOperand(2));
3398     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3399                          Name.startswith("avx512.mask.por."))) {
3400       VectorType *FTy = cast<VectorType>(CI->getType());
3401       VectorType *ITy = VectorType::getInteger(FTy);
3402       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3403                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3404       Rep = Builder.CreateBitCast(Rep, FTy);
3405       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3406                           CI->getArgOperand(2));
3407     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3408                          Name.startswith("avx512.mask.pxor."))) {
3409       VectorType *FTy = cast<VectorType>(CI->getType());
3410       VectorType *ITy = VectorType::getInteger(FTy);
3411       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3412                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3413       Rep = Builder.CreateBitCast(Rep, FTy);
3414       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3415                           CI->getArgOperand(2));
3416     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3417       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3418       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3419                           CI->getArgOperand(2));
3420     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3421       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3422       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3423                           CI->getArgOperand(2));
3424     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3425       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3426       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3427                           CI->getArgOperand(2));
3428     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3429       if (Name.endswith(".512")) {
3430         Intrinsic::ID IID;
3431         if (Name[17] == 's')
3432           IID = Intrinsic::x86_avx512_add_ps_512;
3433         else
3434           IID = Intrinsic::x86_avx512_add_pd_512;
3435 
3436         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3437                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3438                                    CI->getArgOperand(4) });
3439       } else {
3440         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3441       }
3442       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3443                           CI->getArgOperand(2));
3444     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3445       if (Name.endswith(".512")) {
3446         Intrinsic::ID IID;
3447         if (Name[17] == 's')
3448           IID = Intrinsic::x86_avx512_div_ps_512;
3449         else
3450           IID = Intrinsic::x86_avx512_div_pd_512;
3451 
3452         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3453                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3454                                    CI->getArgOperand(4) });
3455       } else {
3456         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3457       }
3458       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3459                           CI->getArgOperand(2));
3460     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3461       if (Name.endswith(".512")) {
3462         Intrinsic::ID IID;
3463         if (Name[17] == 's')
3464           IID = Intrinsic::x86_avx512_mul_ps_512;
3465         else
3466           IID = Intrinsic::x86_avx512_mul_pd_512;
3467 
3468         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3469                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3470                                    CI->getArgOperand(4) });
3471       } else {
3472         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3473       }
3474       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3475                           CI->getArgOperand(2));
3476     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3477       if (Name.endswith(".512")) {
3478         Intrinsic::ID IID;
3479         if (Name[17] == 's')
3480           IID = Intrinsic::x86_avx512_sub_ps_512;
3481         else
3482           IID = Intrinsic::x86_avx512_sub_pd_512;
3483 
3484         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3485                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3486                                    CI->getArgOperand(4) });
3487       } else {
3488         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3489       }
3490       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3491                           CI->getArgOperand(2));
3492     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3493                          Name.startswith("avx512.mask.min.p")) &&
3494                Name.drop_front(18) == ".512") {
3495       bool IsDouble = Name[17] == 'd';
3496       bool IsMin = Name[13] == 'i';
3497       static const Intrinsic::ID MinMaxTbl[2][2] = {
3498         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3499         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3500       };
3501       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3502 
3503       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3504                                { CI->getArgOperand(0), CI->getArgOperand(1),
3505                                  CI->getArgOperand(4) });
3506       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3507                           CI->getArgOperand(2));
3508     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3509       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3510                                                          Intrinsic::ctlz,
3511                                                          CI->getType()),
3512                                { CI->getArgOperand(0), Builder.getInt1(false) });
3513       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3514                           CI->getArgOperand(1));
3515     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3516       bool IsImmediate = Name[16] == 'i' ||
3517                          (Name.size() > 18 && Name[18] == 'i');
3518       bool IsVariable = Name[16] == 'v';
3519       char Size = Name[16] == '.' ? Name[17] :
3520                   Name[17] == '.' ? Name[18] :
3521                   Name[18] == '.' ? Name[19] :
3522                                     Name[20];
3523 
3524       Intrinsic::ID IID;
3525       if (IsVariable && Name[17] != '.') {
3526         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3527           IID = Intrinsic::x86_avx2_psllv_q;
3528         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3529           IID = Intrinsic::x86_avx2_psllv_q_256;
3530         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3531           IID = Intrinsic::x86_avx2_psllv_d;
3532         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3533           IID = Intrinsic::x86_avx2_psllv_d_256;
3534         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3535           IID = Intrinsic::x86_avx512_psllv_w_128;
3536         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3537           IID = Intrinsic::x86_avx512_psllv_w_256;
3538         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3539           IID = Intrinsic::x86_avx512_psllv_w_512;
3540         else
3541           llvm_unreachable("Unexpected size");
3542       } else if (Name.endswith(".128")) {
3543         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3544           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3545                             : Intrinsic::x86_sse2_psll_d;
3546         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3547           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3548                             : Intrinsic::x86_sse2_psll_q;
3549         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3550           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3551                             : Intrinsic::x86_sse2_psll_w;
3552         else
3553           llvm_unreachable("Unexpected size");
3554       } else if (Name.endswith(".256")) {
3555         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3556           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3557                             : Intrinsic::x86_avx2_psll_d;
3558         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3559           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3560                             : Intrinsic::x86_avx2_psll_q;
3561         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3562           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3563                             : Intrinsic::x86_avx2_psll_w;
3564         else
3565           llvm_unreachable("Unexpected size");
3566       } else {
3567         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3568           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3569                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3570                               Intrinsic::x86_avx512_psll_d_512;
3571         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3572           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3573                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3574                               Intrinsic::x86_avx512_psll_q_512;
3575         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3576           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3577                             : Intrinsic::x86_avx512_psll_w_512;
3578         else
3579           llvm_unreachable("Unexpected size");
3580       }
3581 
3582       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3583     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3584       bool IsImmediate = Name[16] == 'i' ||
3585                          (Name.size() > 18 && Name[18] == 'i');
3586       bool IsVariable = Name[16] == 'v';
3587       char Size = Name[16] == '.' ? Name[17] :
3588                   Name[17] == '.' ? Name[18] :
3589                   Name[18] == '.' ? Name[19] :
3590                                     Name[20];
3591 
3592       Intrinsic::ID IID;
3593       if (IsVariable && Name[17] != '.') {
3594         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3595           IID = Intrinsic::x86_avx2_psrlv_q;
3596         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3597           IID = Intrinsic::x86_avx2_psrlv_q_256;
3598         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3599           IID = Intrinsic::x86_avx2_psrlv_d;
3600         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3601           IID = Intrinsic::x86_avx2_psrlv_d_256;
3602         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3603           IID = Intrinsic::x86_avx512_psrlv_w_128;
3604         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3605           IID = Intrinsic::x86_avx512_psrlv_w_256;
3606         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3607           IID = Intrinsic::x86_avx512_psrlv_w_512;
3608         else
3609           llvm_unreachable("Unexpected size");
3610       } else if (Name.endswith(".128")) {
3611         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3612           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3613                             : Intrinsic::x86_sse2_psrl_d;
3614         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3615           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3616                             : Intrinsic::x86_sse2_psrl_q;
3617         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3618           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3619                             : Intrinsic::x86_sse2_psrl_w;
3620         else
3621           llvm_unreachable("Unexpected size");
3622       } else if (Name.endswith(".256")) {
3623         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3624           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3625                             : Intrinsic::x86_avx2_psrl_d;
3626         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3627           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3628                             : Intrinsic::x86_avx2_psrl_q;
3629         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3630           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3631                             : Intrinsic::x86_avx2_psrl_w;
3632         else
3633           llvm_unreachable("Unexpected size");
3634       } else {
3635         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3636           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3637                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3638                               Intrinsic::x86_avx512_psrl_d_512;
3639         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3640           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3641                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3642                               Intrinsic::x86_avx512_psrl_q_512;
3643         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3644           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3645                             : Intrinsic::x86_avx512_psrl_w_512;
3646         else
3647           llvm_unreachable("Unexpected size");
3648       }
3649 
3650       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3651     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3652       bool IsImmediate = Name[16] == 'i' ||
3653                          (Name.size() > 18 && Name[18] == 'i');
3654       bool IsVariable = Name[16] == 'v';
3655       char Size = Name[16] == '.' ? Name[17] :
3656                   Name[17] == '.' ? Name[18] :
3657                   Name[18] == '.' ? Name[19] :
3658                                     Name[20];
3659 
3660       Intrinsic::ID IID;
3661       if (IsVariable && Name[17] != '.') {
3662         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3663           IID = Intrinsic::x86_avx2_psrav_d;
3664         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3665           IID = Intrinsic::x86_avx2_psrav_d_256;
3666         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3667           IID = Intrinsic::x86_avx512_psrav_w_128;
3668         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3669           IID = Intrinsic::x86_avx512_psrav_w_256;
3670         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3671           IID = Intrinsic::x86_avx512_psrav_w_512;
3672         else
3673           llvm_unreachable("Unexpected size");
3674       } else if (Name.endswith(".128")) {
3675         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3676           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3677                             : Intrinsic::x86_sse2_psra_d;
3678         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3679           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3680                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3681                               Intrinsic::x86_avx512_psra_q_128;
3682         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3683           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3684                             : Intrinsic::x86_sse2_psra_w;
3685         else
3686           llvm_unreachable("Unexpected size");
3687       } else if (Name.endswith(".256")) {
3688         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3689           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3690                             : Intrinsic::x86_avx2_psra_d;
3691         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3692           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3693                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3694                               Intrinsic::x86_avx512_psra_q_256;
3695         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3696           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3697                             : Intrinsic::x86_avx2_psra_w;
3698         else
3699           llvm_unreachable("Unexpected size");
3700       } else {
3701         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3702           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3703                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3704                               Intrinsic::x86_avx512_psra_d_512;
3705         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3706           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3707                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3708                               Intrinsic::x86_avx512_psra_q_512;
3709         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3710           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3711                             : Intrinsic::x86_avx512_psra_w_512;
3712         else
3713           llvm_unreachable("Unexpected size");
3714       }
3715 
3716       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3717     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3718       Rep = upgradeMaskedMove(Builder, *CI);
3719     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3720       Rep = UpgradeMaskToInt(Builder, *CI);
3721     } else if (IsX86 && Name.endswith(".movntdqa")) {
3722       MDNode *Node = MDNode::get(
3723           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3724 
3725       Value *Ptr = CI->getArgOperand(0);
3726 
3727       // Convert the type of the pointer to a pointer to the stored type.
3728       Value *BC = Builder.CreateBitCast(
3729           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3730       LoadInst *LI = Builder.CreateAlignedLoad(
3731           CI->getType(), BC,
3732           Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3733       LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3734       Rep = LI;
3735     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3736                          Name.startswith("fma.vfmsub.") ||
3737                          Name.startswith("fma.vfnmadd.") ||
3738                          Name.startswith("fma.vfnmsub."))) {
3739       bool NegMul = Name[6] == 'n';
3740       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3741       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3742 
3743       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3744                        CI->getArgOperand(2) };
3745 
3746       if (IsScalar) {
3747         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3748         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3749         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3750       }
3751 
3752       if (NegMul && !IsScalar)
3753         Ops[0] = Builder.CreateFNeg(Ops[0]);
3754       if (NegMul && IsScalar)
3755         Ops[1] = Builder.CreateFNeg(Ops[1]);
3756       if (NegAcc)
3757         Ops[2] = Builder.CreateFNeg(Ops[2]);
3758 
3759       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3760                                                          Intrinsic::fma,
3761                                                          Ops[0]->getType()),
3762                                Ops);
3763 
3764       if (IsScalar)
3765         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3766                                           (uint64_t)0);
3767     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3768       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3769                        CI->getArgOperand(2) };
3770 
3771       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3772       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3773       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3774 
3775       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3776                                                          Intrinsic::fma,
3777                                                          Ops[0]->getType()),
3778                                Ops);
3779 
3780       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3781                                         Rep, (uint64_t)0);
3782     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3783                          Name.startswith("avx512.maskz.vfmadd.s") ||
3784                          Name.startswith("avx512.mask3.vfmadd.s") ||
3785                          Name.startswith("avx512.mask3.vfmsub.s") ||
3786                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3787       bool IsMask3 = Name[11] == '3';
3788       bool IsMaskZ = Name[11] == 'z';
3789       // Drop the "avx512.mask." to make it easier.
3790       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3791       bool NegMul = Name[2] == 'n';
3792       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3793 
3794       Value *A = CI->getArgOperand(0);
3795       Value *B = CI->getArgOperand(1);
3796       Value *C = CI->getArgOperand(2);
3797 
3798       if (NegMul && (IsMask3 || IsMaskZ))
3799         A = Builder.CreateFNeg(A);
3800       if (NegMul && !(IsMask3 || IsMaskZ))
3801         B = Builder.CreateFNeg(B);
3802       if (NegAcc)
3803         C = Builder.CreateFNeg(C);
3804 
3805       A = Builder.CreateExtractElement(A, (uint64_t)0);
3806       B = Builder.CreateExtractElement(B, (uint64_t)0);
3807       C = Builder.CreateExtractElement(C, (uint64_t)0);
3808 
3809       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3810           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3811         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3812 
3813         Intrinsic::ID IID;
3814         if (Name.back() == 'd')
3815           IID = Intrinsic::x86_avx512_vfmadd_f64;
3816         else
3817           IID = Intrinsic::x86_avx512_vfmadd_f32;
3818         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3819         Rep = Builder.CreateCall(FMA, Ops);
3820       } else {
3821         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3822                                                   Intrinsic::fma,
3823                                                   A->getType());
3824         Rep = Builder.CreateCall(FMA, { A, B, C });
3825       }
3826 
3827       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3828                         IsMask3 ? C : A;
3829 
3830       // For Mask3 with NegAcc, we need to create a new extractelement that
3831       // avoids the negation above.
3832       if (NegAcc && IsMask3)
3833         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3834                                                 (uint64_t)0);
3835 
3836       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3837                                 Rep, PassThru);
3838       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3839                                         Rep, (uint64_t)0);
3840     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3841                          Name.startswith("avx512.mask.vfnmadd.p") ||
3842                          Name.startswith("avx512.mask.vfnmsub.p") ||
3843                          Name.startswith("avx512.mask3.vfmadd.p") ||
3844                          Name.startswith("avx512.mask3.vfmsub.p") ||
3845                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3846                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3847       bool IsMask3 = Name[11] == '3';
3848       bool IsMaskZ = Name[11] == 'z';
3849       // Drop the "avx512.mask." to make it easier.
3850       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3851       bool NegMul = Name[2] == 'n';
3852       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3853 
3854       Value *A = CI->getArgOperand(0);
3855       Value *B = CI->getArgOperand(1);
3856       Value *C = CI->getArgOperand(2);
3857 
3858       if (NegMul && (IsMask3 || IsMaskZ))
3859         A = Builder.CreateFNeg(A);
3860       if (NegMul && !(IsMask3 || IsMaskZ))
3861         B = Builder.CreateFNeg(B);
3862       if (NegAcc)
3863         C = Builder.CreateFNeg(C);
3864 
3865       if (CI->arg_size() == 5 &&
3866           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3867            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3868         Intrinsic::ID IID;
3869         // Check the character before ".512" in string.
3870         if (Name[Name.size()-5] == 's')
3871           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3872         else
3873           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3874 
3875         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3876                                  { A, B, C, CI->getArgOperand(4) });
3877       } else {
3878         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3879                                                   Intrinsic::fma,
3880                                                   A->getType());
3881         Rep = Builder.CreateCall(FMA, { A, B, C });
3882       }
3883 
3884       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3885                         IsMask3 ? CI->getArgOperand(2) :
3886                                   CI->getArgOperand(0);
3887 
3888       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3889     } else if (IsX86 &&  Name.startswith("fma.vfmsubadd.p")) {
3890       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3891       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3892       Intrinsic::ID IID;
3893       if (VecWidth == 128 && EltWidth == 32)
3894         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3895       else if (VecWidth == 256 && EltWidth == 32)
3896         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3897       else if (VecWidth == 128 && EltWidth == 64)
3898         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3899       else if (VecWidth == 256 && EltWidth == 64)
3900         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3901       else
3902         llvm_unreachable("Unexpected intrinsic");
3903 
3904       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3905                        CI->getArgOperand(2) };
3906       Ops[2] = Builder.CreateFNeg(Ops[2]);
3907       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3908                                Ops);
3909     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3910                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3911                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3912                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3913       bool IsMask3 = Name[11] == '3';
3914       bool IsMaskZ = Name[11] == 'z';
3915       // Drop the "avx512.mask." to make it easier.
3916       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3917       bool IsSubAdd = Name[3] == 's';
3918       if (CI->arg_size() == 5) {
3919         Intrinsic::ID IID;
3920         // Check the character before ".512" in string.
3921         if (Name[Name.size()-5] == 's')
3922           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3923         else
3924           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3925 
3926         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3927                          CI->getArgOperand(2), CI->getArgOperand(4) };
3928         if (IsSubAdd)
3929           Ops[2] = Builder.CreateFNeg(Ops[2]);
3930 
3931         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3932                                  Ops);
3933       } else {
3934         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3935 
3936         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3937                          CI->getArgOperand(2) };
3938 
3939         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3940                                                   Ops[0]->getType());
3941         Value *Odd = Builder.CreateCall(FMA, Ops);
3942         Ops[2] = Builder.CreateFNeg(Ops[2]);
3943         Value *Even = Builder.CreateCall(FMA, Ops);
3944 
3945         if (IsSubAdd)
3946           std::swap(Even, Odd);
3947 
3948         SmallVector<int, 32> Idxs(NumElts);
3949         for (int i = 0; i != NumElts; ++i)
3950           Idxs[i] = i + (i % 2) * NumElts;
3951 
3952         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3953       }
3954 
3955       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3956                         IsMask3 ? CI->getArgOperand(2) :
3957                                   CI->getArgOperand(0);
3958 
3959       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3960     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3961                          Name.startswith("avx512.maskz.pternlog."))) {
3962       bool ZeroMask = Name[11] == 'z';
3963       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3964       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3965       Intrinsic::ID IID;
3966       if (VecWidth == 128 && EltWidth == 32)
3967         IID = Intrinsic::x86_avx512_pternlog_d_128;
3968       else if (VecWidth == 256 && EltWidth == 32)
3969         IID = Intrinsic::x86_avx512_pternlog_d_256;
3970       else if (VecWidth == 512 && EltWidth == 32)
3971         IID = Intrinsic::x86_avx512_pternlog_d_512;
3972       else if (VecWidth == 128 && EltWidth == 64)
3973         IID = Intrinsic::x86_avx512_pternlog_q_128;
3974       else if (VecWidth == 256 && EltWidth == 64)
3975         IID = Intrinsic::x86_avx512_pternlog_q_256;
3976       else if (VecWidth == 512 && EltWidth == 64)
3977         IID = Intrinsic::x86_avx512_pternlog_q_512;
3978       else
3979         llvm_unreachable("Unexpected intrinsic");
3980 
3981       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3982                         CI->getArgOperand(2), CI->getArgOperand(3) };
3983       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3984                                Args);
3985       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3986                                  : CI->getArgOperand(0);
3987       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3988     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3989                          Name.startswith("avx512.maskz.vpmadd52"))) {
3990       bool ZeroMask = Name[11] == 'z';
3991       bool High = Name[20] == 'h' || Name[21] == 'h';
3992       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3993       Intrinsic::ID IID;
3994       if (VecWidth == 128 && !High)
3995         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3996       else if (VecWidth == 256 && !High)
3997         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3998       else if (VecWidth == 512 && !High)
3999         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4000       else if (VecWidth == 128 && High)
4001         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4002       else if (VecWidth == 256 && High)
4003         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4004       else if (VecWidth == 512 && High)
4005         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4006       else
4007         llvm_unreachable("Unexpected intrinsic");
4008 
4009       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4010                         CI->getArgOperand(2) };
4011       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4012                                Args);
4013       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4014                                  : CI->getArgOperand(0);
4015       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4016     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
4017                          Name.startswith("avx512.mask.vpermt2var.") ||
4018                          Name.startswith("avx512.maskz.vpermt2var."))) {
4019       bool ZeroMask = Name[11] == 'z';
4020       bool IndexForm = Name[17] == 'i';
4021       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4022     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
4023                          Name.startswith("avx512.maskz.vpdpbusd.") ||
4024                          Name.startswith("avx512.mask.vpdpbusds.") ||
4025                          Name.startswith("avx512.maskz.vpdpbusds."))) {
4026       bool ZeroMask = Name[11] == 'z';
4027       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4028       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4029       Intrinsic::ID IID;
4030       if (VecWidth == 128 && !IsSaturating)
4031         IID = Intrinsic::x86_avx512_vpdpbusd_128;
4032       else if (VecWidth == 256 && !IsSaturating)
4033         IID = Intrinsic::x86_avx512_vpdpbusd_256;
4034       else if (VecWidth == 512 && !IsSaturating)
4035         IID = Intrinsic::x86_avx512_vpdpbusd_512;
4036       else if (VecWidth == 128 && IsSaturating)
4037         IID = Intrinsic::x86_avx512_vpdpbusds_128;
4038       else if (VecWidth == 256 && IsSaturating)
4039         IID = Intrinsic::x86_avx512_vpdpbusds_256;
4040       else if (VecWidth == 512 && IsSaturating)
4041         IID = Intrinsic::x86_avx512_vpdpbusds_512;
4042       else
4043         llvm_unreachable("Unexpected intrinsic");
4044 
4045       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4046                         CI->getArgOperand(2)  };
4047       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4048                                Args);
4049       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4050                                  : CI->getArgOperand(0);
4051       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4052     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
4053                          Name.startswith("avx512.maskz.vpdpwssd.") ||
4054                          Name.startswith("avx512.mask.vpdpwssds.") ||
4055                          Name.startswith("avx512.maskz.vpdpwssds."))) {
4056       bool ZeroMask = Name[11] == 'z';
4057       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4058       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4059       Intrinsic::ID IID;
4060       if (VecWidth == 128 && !IsSaturating)
4061         IID = Intrinsic::x86_avx512_vpdpwssd_128;
4062       else if (VecWidth == 256 && !IsSaturating)
4063         IID = Intrinsic::x86_avx512_vpdpwssd_256;
4064       else if (VecWidth == 512 && !IsSaturating)
4065         IID = Intrinsic::x86_avx512_vpdpwssd_512;
4066       else if (VecWidth == 128 && IsSaturating)
4067         IID = Intrinsic::x86_avx512_vpdpwssds_128;
4068       else if (VecWidth == 256 && IsSaturating)
4069         IID = Intrinsic::x86_avx512_vpdpwssds_256;
4070       else if (VecWidth == 512 && IsSaturating)
4071         IID = Intrinsic::x86_avx512_vpdpwssds_512;
4072       else
4073         llvm_unreachable("Unexpected intrinsic");
4074 
4075       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4076                         CI->getArgOperand(2)  };
4077       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4078                                Args);
4079       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4080                                  : CI->getArgOperand(0);
4081       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4082     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4083                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
4084                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
4085       Intrinsic::ID IID;
4086       if (Name[0] == 'a' && Name.back() == '2')
4087         IID = Intrinsic::x86_addcarry_32;
4088       else if (Name[0] == 'a' && Name.back() == '4')
4089         IID = Intrinsic::x86_addcarry_64;
4090       else if (Name[0] == 's' && Name.back() == '2')
4091         IID = Intrinsic::x86_subborrow_32;
4092       else if (Name[0] == 's' && Name.back() == '4')
4093         IID = Intrinsic::x86_subborrow_64;
4094       else
4095         llvm_unreachable("Unexpected intrinsic");
4096 
4097       // Make a call with 3 operands.
4098       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4099                         CI->getArgOperand(2)};
4100       Value *NewCall = Builder.CreateCall(
4101                                 Intrinsic::getDeclaration(CI->getModule(), IID),
4102                                 Args);
4103 
4104       // Extract the second result and store it.
4105       Value *Data = Builder.CreateExtractValue(NewCall, 1);
4106       // Cast the pointer to the right type.
4107       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4108                                  llvm::PointerType::getUnqual(Data->getType()));
4109       Builder.CreateAlignedStore(Data, Ptr, Align(1));
4110       // Replace the original call result with the first result of the new call.
4111       Value *CF = Builder.CreateExtractValue(NewCall, 0);
4112 
4113       CI->replaceAllUsesWith(CF);
4114       Rep = nullptr;
4115     } else if (IsX86 && Name.startswith("avx512.mask.") &&
4116                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4117       // Rep will be updated by the call in the condition.
4118     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4119       Value *Arg = CI->getArgOperand(0);
4120       Value *Neg = Builder.CreateNeg(Arg, "neg");
4121       Value *Cmp = Builder.CreateICmpSGE(
4122           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4123       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4124     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
4125                           Name.startswith("atomic.load.add.f64.p"))) {
4126       Value *Ptr = CI->getArgOperand(0);
4127       Value *Val = CI->getArgOperand(1);
4128       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4129                                     AtomicOrdering::SequentiallyConsistent);
4130     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
4131                           Name == "max.ui" || Name == "max.ull")) {
4132       Value *Arg0 = CI->getArgOperand(0);
4133       Value *Arg1 = CI->getArgOperand(1);
4134       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
4135                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4136                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4137       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4138     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
4139                           Name == "min.ui" || Name == "min.ull")) {
4140       Value *Arg0 = CI->getArgOperand(0);
4141       Value *Arg1 = CI->getArgOperand(1);
4142       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
4143                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4144                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4145       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4146     } else if (IsNVVM && Name == "clz.ll") {
4147       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
4148       Value *Arg = CI->getArgOperand(0);
4149       Value *Ctlz = Builder.CreateCall(
4150           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4151                                     {Arg->getType()}),
4152           {Arg, Builder.getFalse()}, "ctlz");
4153       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4154     } else if (IsNVVM && Name == "popc.ll") {
4155       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
4156       // i64.
4157       Value *Arg = CI->getArgOperand(0);
4158       Value *Popc = Builder.CreateCall(
4159           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4160                                     {Arg->getType()}),
4161           Arg, "ctpop");
4162       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4163     } else if (IsNVVM) {
4164       if (Name == "h2f") {
4165         Rep =
4166             Builder.CreateCall(Intrinsic::getDeclaration(
4167                                    F->getParent(), Intrinsic::convert_from_fp16,
4168                                    {Builder.getFloatTy()}),
4169                                CI->getArgOperand(0), "h2f");
4170       } else {
4171         Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
4172         if (IID != Intrinsic::not_intrinsic &&
4173             !F->getReturnType()->getScalarType()->isBFloatTy()) {
4174           rename(F);
4175           NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4176           SmallVector<Value *, 2> Args;
4177           for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4178             Value *Arg = CI->getArgOperand(I);
4179             Type *OldType = Arg->getType();
4180             Type *NewType = NewFn->getArg(I)->getType();
4181             Args.push_back((OldType->isIntegerTy() &&
4182                             NewType->getScalarType()->isBFloatTy())
4183                                ? Builder.CreateBitCast(Arg, NewType)
4184                                : Arg);
4185           }
4186           Rep = Builder.CreateCall(NewFn, Args);
4187           if (F->getReturnType()->isIntegerTy())
4188             Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4189         }
4190       }
4191     } else if (IsARM) {
4192       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
4193     } else if (IsAMDGCN) {
4194       Rep = UpgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4195     } else {
4196       llvm_unreachable("Unknown function for CallBase upgrade.");
4197     }
4198 
4199     if (Rep)
4200       CI->replaceAllUsesWith(Rep);
4201     CI->eraseFromParent();
4202     return;
4203   }
4204 
4205   const auto &DefaultCase = [&]() -> void {
4206     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4207       // Handle generic mangling change.
4208       assert(
4209           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4210           "Unknown function for CallBase upgrade and isn't just a name change");
4211       CI->setCalledFunction(NewFn);
4212       return;
4213     }
4214 
4215     // This must be an upgrade from a named to a literal struct.
4216     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4217       assert(OldST != NewFn->getReturnType() &&
4218              "Return type must have changed");
4219       assert(OldST->getNumElements() ==
4220                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4221              "Must have same number of elements");
4222 
4223       SmallVector<Value *> Args(CI->args());
4224       Value *NewCI = Builder.CreateCall(NewFn, Args);
4225       Value *Res = PoisonValue::get(OldST);
4226       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4227         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4228         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4229       }
4230       CI->replaceAllUsesWith(Res);
4231       CI->eraseFromParent();
4232       return;
4233     }
4234 
4235     // We're probably about to produce something invalid. Let the verifier catch
4236     // it instead of dying here.
4237     CI->setCalledOperand(
4238         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4239     return;
4240   };
4241   CallInst *NewCall = nullptr;
4242   switch (NewFn->getIntrinsicID()) {
4243   default: {
4244     DefaultCase();
4245     return;
4246   }
4247   case Intrinsic::arm_neon_vst1:
4248   case Intrinsic::arm_neon_vst2:
4249   case Intrinsic::arm_neon_vst3:
4250   case Intrinsic::arm_neon_vst4:
4251   case Intrinsic::arm_neon_vst2lane:
4252   case Intrinsic::arm_neon_vst3lane:
4253   case Intrinsic::arm_neon_vst4lane: {
4254     SmallVector<Value *, 4> Args(CI->args());
4255     NewCall = Builder.CreateCall(NewFn, Args);
4256     break;
4257   }
4258   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4259   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4260   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4261     LLVMContext &Ctx = F->getParent()->getContext();
4262     SmallVector<Value *, 4> Args(CI->args());
4263     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4264                                cast<ConstantInt>(Args[3])->getZExtValue());
4265     NewCall = Builder.CreateCall(NewFn, Args);
4266     break;
4267   }
4268   case Intrinsic::aarch64_sve_ld3_sret:
4269   case Intrinsic::aarch64_sve_ld4_sret:
4270   case Intrinsic::aarch64_sve_ld2_sret: {
4271     StringRef Name = F->getName();
4272     Name = Name.substr(5);
4273     unsigned N = StringSwitch<unsigned>(Name)
4274                      .StartsWith("aarch64.sve.ld2", 2)
4275                      .StartsWith("aarch64.sve.ld3", 3)
4276                      .StartsWith("aarch64.sve.ld4", 4)
4277                      .Default(0);
4278     ScalableVectorType *RetTy =
4279         dyn_cast<ScalableVectorType>(F->getReturnType());
4280     unsigned MinElts = RetTy->getMinNumElements() / N;
4281     SmallVector<Value *, 2> Args(CI->args());
4282     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4283     Value *Ret = llvm::PoisonValue::get(RetTy);
4284     for (unsigned I = 0; I < N; I++) {
4285       Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4286       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4287       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4288     }
4289     NewCall = dyn_cast<CallInst>(Ret);
4290     break;
4291   }
4292 
4293   case Intrinsic::vector_extract: {
4294     StringRef Name = F->getName();
4295     Name = Name.substr(5); // Strip llvm
4296     if (!Name.startswith("aarch64.sve.tuple.get")) {
4297       DefaultCase();
4298       return;
4299     }
4300     ScalableVectorType *RetTy =
4301         dyn_cast<ScalableVectorType>(F->getReturnType());
4302     unsigned MinElts = RetTy->getMinNumElements();
4303     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4304     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4305     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4306     break;
4307   }
4308 
4309   case Intrinsic::vector_insert: {
4310     StringRef Name = F->getName();
4311     Name = Name.substr(5);
4312     if (!Name.startswith("aarch64.sve.tuple")) {
4313       DefaultCase();
4314       return;
4315     }
4316     if (Name.startswith("aarch64.sve.tuple.set")) {
4317       unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4318       ScalableVectorType *Ty =
4319           dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4320       Value *NewIdx =
4321           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4322       NewCall = Builder.CreateCall(
4323           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4324       break;
4325     }
4326     if (Name.startswith("aarch64.sve.tuple.create")) {
4327       unsigned N = StringSwitch<unsigned>(Name)
4328                        .StartsWith("aarch64.sve.tuple.create2", 2)
4329                        .StartsWith("aarch64.sve.tuple.create3", 3)
4330                        .StartsWith("aarch64.sve.tuple.create4", 4)
4331                        .Default(0);
4332       assert(N > 1 && "Create is expected to be between 2-4");
4333       ScalableVectorType *RetTy =
4334           dyn_cast<ScalableVectorType>(F->getReturnType());
4335       Value *Ret = llvm::PoisonValue::get(RetTy);
4336       unsigned MinElts = RetTy->getMinNumElements() / N;
4337       for (unsigned I = 0; I < N; I++) {
4338         Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4339         Value *V = CI->getArgOperand(I);
4340         Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4341       }
4342       NewCall = dyn_cast<CallInst>(Ret);
4343     }
4344     break;
4345   }
4346 
4347   case Intrinsic::arm_neon_bfdot:
4348   case Intrinsic::arm_neon_bfmmla:
4349   case Intrinsic::arm_neon_bfmlalb:
4350   case Intrinsic::arm_neon_bfmlalt:
4351   case Intrinsic::aarch64_neon_bfdot:
4352   case Intrinsic::aarch64_neon_bfmmla:
4353   case Intrinsic::aarch64_neon_bfmlalb:
4354   case Intrinsic::aarch64_neon_bfmlalt: {
4355     SmallVector<Value *, 3> Args;
4356     assert(CI->arg_size() == 3 &&
4357            "Mismatch between function args and call args");
4358     size_t OperandWidth =
4359         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4360     assert((OperandWidth == 64 || OperandWidth == 128) &&
4361            "Unexpected operand width");
4362     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4363     auto Iter = CI->args().begin();
4364     Args.push_back(*Iter++);
4365     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4366     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4367     NewCall = Builder.CreateCall(NewFn, Args);
4368     break;
4369   }
4370 
4371   case Intrinsic::bitreverse:
4372     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4373     break;
4374 
4375   case Intrinsic::ctlz:
4376   case Intrinsic::cttz:
4377     assert(CI->arg_size() == 1 &&
4378            "Mismatch between function args and call args");
4379     NewCall =
4380         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4381     break;
4382 
4383   case Intrinsic::objectsize: {
4384     Value *NullIsUnknownSize =
4385         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4386     Value *Dynamic =
4387         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4388     NewCall = Builder.CreateCall(
4389         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4390     break;
4391   }
4392 
4393   case Intrinsic::ctpop:
4394     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4395     break;
4396 
4397   case Intrinsic::convert_from_fp16:
4398     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4399     break;
4400 
4401   case Intrinsic::dbg_value: {
4402     StringRef Name = F->getName();
4403     Name = Name.substr(5); // Strip llvm.
4404     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4405     if (Name.startswith("dbg.addr")) {
4406       DIExpression *Expr = cast<DIExpression>(
4407           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4408       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4409       NewCall =
4410           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4411                                      MetadataAsValue::get(C, Expr)});
4412       break;
4413     }
4414 
4415     // Upgrade from the old version that had an extra offset argument.
4416     assert(CI->arg_size() == 4);
4417     // Drop nonzero offsets instead of attempting to upgrade them.
4418     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4419       if (Offset->isZeroValue()) {
4420         NewCall = Builder.CreateCall(
4421             NewFn,
4422             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4423         break;
4424       }
4425     CI->eraseFromParent();
4426     return;
4427   }
4428 
4429   case Intrinsic::ptr_annotation:
4430     // Upgrade from versions that lacked the annotation attribute argument.
4431     if (CI->arg_size() != 4) {
4432       DefaultCase();
4433       return;
4434     }
4435 
4436     // Create a new call with an added null annotation attribute argument.
4437     NewCall = Builder.CreateCall(
4438         NewFn,
4439         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4440          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
4441     NewCall->takeName(CI);
4442     CI->replaceAllUsesWith(NewCall);
4443     CI->eraseFromParent();
4444     return;
4445 
4446   case Intrinsic::var_annotation:
4447     // Upgrade from versions that lacked the annotation attribute argument.
4448     if (CI->arg_size() != 4) {
4449       DefaultCase();
4450       return;
4451     }
4452     // Create a new call with an added null annotation attribute argument.
4453     NewCall = Builder.CreateCall(
4454         NewFn,
4455         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4456          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
4457     NewCall->takeName(CI);
4458     CI->replaceAllUsesWith(NewCall);
4459     CI->eraseFromParent();
4460     return;
4461 
4462   case Intrinsic::riscv_aes32dsi:
4463   case Intrinsic::riscv_aes32dsmi:
4464   case Intrinsic::riscv_aes32esi:
4465   case Intrinsic::riscv_aes32esmi:
4466   case Intrinsic::riscv_sm4ks:
4467   case Intrinsic::riscv_sm4ed: {
4468     // The last argument to these intrinsics used to be i8 and changed to i32.
4469     // The type overload for sm4ks and sm4ed was removed.
4470     Value *Arg2 = CI->getArgOperand(2);
4471     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4472       return;
4473 
4474     Value *Arg0 = CI->getArgOperand(0);
4475     Value *Arg1 = CI->getArgOperand(1);
4476     if (CI->getType()->isIntegerTy(64)) {
4477       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4478       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4479     }
4480 
4481     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4482                             cast<ConstantInt>(Arg2)->getZExtValue());
4483 
4484     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4485     Value *Res = NewCall;
4486     if (Res->getType() != CI->getType())
4487       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4488     NewCall->takeName(CI);
4489     CI->replaceAllUsesWith(Res);
4490     CI->eraseFromParent();
4491     return;
4492   }
4493   case Intrinsic::riscv_sha256sig0:
4494   case Intrinsic::riscv_sha256sig1:
4495   case Intrinsic::riscv_sha256sum0:
4496   case Intrinsic::riscv_sha256sum1:
4497   case Intrinsic::riscv_sm3p0:
4498   case Intrinsic::riscv_sm3p1: {
4499     // The last argument to these intrinsics used to be i8 and changed to i32.
4500     // The type overload for sm4ks and sm4ed was removed.
4501     if (!CI->getType()->isIntegerTy(64))
4502       return;
4503 
4504     Value *Arg =
4505         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4506 
4507     NewCall = Builder.CreateCall(NewFn, Arg);
4508     Value *Res =
4509         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4510     NewCall->takeName(CI);
4511     CI->replaceAllUsesWith(Res);
4512     CI->eraseFromParent();
4513     return;
4514   }
4515 
4516   case Intrinsic::x86_xop_vfrcz_ss:
4517   case Intrinsic::x86_xop_vfrcz_sd:
4518     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4519     break;
4520 
4521   case Intrinsic::x86_xop_vpermil2pd:
4522   case Intrinsic::x86_xop_vpermil2ps:
4523   case Intrinsic::x86_xop_vpermil2pd_256:
4524   case Intrinsic::x86_xop_vpermil2ps_256: {
4525     SmallVector<Value *, 4> Args(CI->args());
4526     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4527     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4528     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4529     NewCall = Builder.CreateCall(NewFn, Args);
4530     break;
4531   }
4532 
4533   case Intrinsic::x86_sse41_ptestc:
4534   case Intrinsic::x86_sse41_ptestz:
4535   case Intrinsic::x86_sse41_ptestnzc: {
4536     // The arguments for these intrinsics used to be v4f32, and changed
4537     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4538     // So, the only thing required is a bitcast for both arguments.
4539     // First, check the arguments have the old type.
4540     Value *Arg0 = CI->getArgOperand(0);
4541     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4542       return;
4543 
4544     // Old intrinsic, add bitcasts
4545     Value *Arg1 = CI->getArgOperand(1);
4546 
4547     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4548 
4549     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4550     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4551 
4552     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4553     break;
4554   }
4555 
4556   case Intrinsic::x86_rdtscp: {
4557     // This used to take 1 arguments. If we have no arguments, it is already
4558     // upgraded.
4559     if (CI->getNumOperands() == 0)
4560       return;
4561 
4562     NewCall = Builder.CreateCall(NewFn);
4563     // Extract the second result and store it.
4564     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4565     // Cast the pointer to the right type.
4566     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4567                                  llvm::PointerType::getUnqual(Data->getType()));
4568     Builder.CreateAlignedStore(Data, Ptr, Align(1));
4569     // Replace the original call result with the first result of the new call.
4570     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4571 
4572     NewCall->takeName(CI);
4573     CI->replaceAllUsesWith(TSC);
4574     CI->eraseFromParent();
4575     return;
4576   }
4577 
4578   case Intrinsic::x86_sse41_insertps:
4579   case Intrinsic::x86_sse41_dppd:
4580   case Intrinsic::x86_sse41_dpps:
4581   case Intrinsic::x86_sse41_mpsadbw:
4582   case Intrinsic::x86_avx_dp_ps_256:
4583   case Intrinsic::x86_avx2_mpsadbw: {
4584     // Need to truncate the last argument from i32 to i8 -- this argument models
4585     // an inherently 8-bit immediate operand to these x86 instructions.
4586     SmallVector<Value *, 4> Args(CI->args());
4587 
4588     // Replace the last argument with a trunc.
4589     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4590     NewCall = Builder.CreateCall(NewFn, Args);
4591     break;
4592   }
4593 
4594   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4595   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4596   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4597   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4598   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4599   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4600     SmallVector<Value *, 4> Args(CI->args());
4601     unsigned NumElts =
4602         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4603     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4604 
4605     NewCall = Builder.CreateCall(NewFn, Args);
4606     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4607 
4608     NewCall->takeName(CI);
4609     CI->replaceAllUsesWith(Res);
4610     CI->eraseFromParent();
4611     return;
4612   }
4613 
4614   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4615   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4616   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4617   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4618   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4619   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4620     SmallVector<Value *, 4> Args(CI->args());
4621     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4622     if (NewFn->getIntrinsicID() ==
4623         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4624       Args[1] = Builder.CreateBitCast(
4625           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4626 
4627     NewCall = Builder.CreateCall(NewFn, Args);
4628     Value *Res = Builder.CreateBitCast(
4629         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4630 
4631     NewCall->takeName(CI);
4632     CI->replaceAllUsesWith(Res);
4633     CI->eraseFromParent();
4634     return;
4635   }
4636   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4637   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4638   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4639     SmallVector<Value *, 4> Args(CI->args());
4640     unsigned NumElts =
4641         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4642     Args[1] = Builder.CreateBitCast(
4643         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4644     Args[2] = Builder.CreateBitCast(
4645         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4646 
4647     NewCall = Builder.CreateCall(NewFn, Args);
4648     break;
4649   }
4650 
4651   case Intrinsic::thread_pointer: {
4652     NewCall = Builder.CreateCall(NewFn, {});
4653     break;
4654   }
4655 
4656   case Intrinsic::invariant_start:
4657   case Intrinsic::invariant_end: {
4658     SmallVector<Value *, 4> Args(CI->args());
4659     NewCall = Builder.CreateCall(NewFn, Args);
4660     break;
4661   }
4662   case Intrinsic::masked_load:
4663   case Intrinsic::masked_store:
4664   case Intrinsic::masked_gather:
4665   case Intrinsic::masked_scatter: {
4666     SmallVector<Value *, 4> Args(CI->args());
4667     NewCall = Builder.CreateCall(NewFn, Args);
4668     NewCall->copyMetadata(*CI);
4669     break;
4670   }
4671 
4672   case Intrinsic::memcpy:
4673   case Intrinsic::memmove:
4674   case Intrinsic::memset: {
4675     // We have to make sure that the call signature is what we're expecting.
4676     // We only want to change the old signatures by removing the alignment arg:
4677     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4678     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4679     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4680     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4681     // Note: i8*'s in the above can be any pointer type
4682     if (CI->arg_size() != 5) {
4683       DefaultCase();
4684       return;
4685     }
4686     // Remove alignment argument (3), and add alignment attributes to the
4687     // dest/src pointers.
4688     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4689                       CI->getArgOperand(2), CI->getArgOperand(4)};
4690     NewCall = Builder.CreateCall(NewFn, Args);
4691     AttributeList OldAttrs = CI->getAttributes();
4692     AttributeList NewAttrs = AttributeList::get(
4693         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4694         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4695          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4696     NewCall->setAttributes(NewAttrs);
4697     auto *MemCI = cast<MemIntrinsic>(NewCall);
4698     // All mem intrinsics support dest alignment.
4699     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4700     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4701     // Memcpy/Memmove also support source alignment.
4702     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4703       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4704     break;
4705   }
4706   }
4707   assert(NewCall && "Should have either set this variable or returned through "
4708                     "the default case");
4709   NewCall->takeName(CI);
4710   CI->replaceAllUsesWith(NewCall);
4711   CI->eraseFromParent();
4712 }
4713 
4714 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4715   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4716 
4717   // Check if this function should be upgraded and get the replacement function
4718   // if there is one.
4719   Function *NewFn;
4720   if (UpgradeIntrinsicFunction(F, NewFn)) {
4721     // Replace all users of the old function with the new function or new
4722     // instructions. This is not a range loop because the call is deleted.
4723     for (User *U : make_early_inc_range(F->users()))
4724       if (CallBase *CB = dyn_cast<CallBase>(U))
4725         UpgradeIntrinsicCall(CB, NewFn);
4726 
4727     // Remove old function, no longer used, from the module.
4728     F->eraseFromParent();
4729   }
4730 }
4731 
4732 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4733   const unsigned NumOperands = MD.getNumOperands();
4734   if (NumOperands == 0)
4735     return &MD; // Invalid, punt to a verifier error.
4736 
4737   // Check if the tag uses struct-path aware TBAA format.
4738   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4739     return &MD;
4740 
4741   auto &Context = MD.getContext();
4742   if (NumOperands == 3) {
4743     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4744     MDNode *ScalarType = MDNode::get(Context, Elts);
4745     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4746     Metadata *Elts2[] = {ScalarType, ScalarType,
4747                          ConstantAsMetadata::get(
4748                              Constant::getNullValue(Type::getInt64Ty(Context))),
4749                          MD.getOperand(2)};
4750     return MDNode::get(Context, Elts2);
4751   }
4752   // Create a MDNode <MD, MD, offset 0>
4753   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4754                                     Type::getInt64Ty(Context)))};
4755   return MDNode::get(Context, Elts);
4756 }
4757 
4758 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4759                                       Instruction *&Temp) {
4760   if (Opc != Instruction::BitCast)
4761     return nullptr;
4762 
4763   Temp = nullptr;
4764   Type *SrcTy = V->getType();
4765   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4766       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4767     LLVMContext &Context = V->getContext();
4768 
4769     // We have no information about target data layout, so we assume that
4770     // the maximum pointer size is 64bit.
4771     Type *MidTy = Type::getInt64Ty(Context);
4772     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4773 
4774     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4775   }
4776 
4777   return nullptr;
4778 }
4779 
4780 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4781   if (Opc != Instruction::BitCast)
4782     return nullptr;
4783 
4784   Type *SrcTy = C->getType();
4785   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4786       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4787     LLVMContext &Context = C->getContext();
4788 
4789     // We have no information about target data layout, so we assume that
4790     // the maximum pointer size is 64bit.
4791     Type *MidTy = Type::getInt64Ty(Context);
4792 
4793     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4794                                      DestTy);
4795   }
4796 
4797   return nullptr;
4798 }
4799 
4800 /// Check the debug info version number, if it is out-dated, drop the debug
4801 /// info. Return true if module is modified.
4802 bool llvm::UpgradeDebugInfo(Module &M) {
4803   if (DisableAutoUpgradeDebugInfo)
4804     return false;
4805 
4806   unsigned Version = getDebugMetadataVersionFromModule(M);
4807   if (Version == DEBUG_METADATA_VERSION) {
4808     bool BrokenDebugInfo = false;
4809     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4810       report_fatal_error("Broken module found, compilation aborted!");
4811     if (!BrokenDebugInfo)
4812       // Everything is ok.
4813       return false;
4814     else {
4815       // Diagnose malformed debug info.
4816       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4817       M.getContext().diagnose(Diag);
4818     }
4819   }
4820   bool Modified = StripDebugInfo(M);
4821   if (Modified && Version != DEBUG_METADATA_VERSION) {
4822     // Diagnose a version mismatch.
4823     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4824     M.getContext().diagnose(DiagVersion);
4825   }
4826   return Modified;
4827 }
4828 
4829 /// This checks for objc retain release marker which should be upgraded. It
4830 /// returns true if module is modified.
4831 static bool UpgradeRetainReleaseMarker(Module &M) {
4832   bool Changed = false;
4833   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4834   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4835   if (ModRetainReleaseMarker) {
4836     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4837     if (Op) {
4838       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4839       if (ID) {
4840         SmallVector<StringRef, 4> ValueComp;
4841         ID->getString().split(ValueComp, "#");
4842         if (ValueComp.size() == 2) {
4843           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4844           ID = MDString::get(M.getContext(), NewValue);
4845         }
4846         M.addModuleFlag(Module::Error, MarkerKey, ID);
4847         M.eraseNamedMetadata(ModRetainReleaseMarker);
4848         Changed = true;
4849       }
4850     }
4851   }
4852   return Changed;
4853 }
4854 
4855 void llvm::UpgradeARCRuntime(Module &M) {
4856   // This lambda converts normal function calls to ARC runtime functions to
4857   // intrinsic calls.
4858   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4859                                 llvm::Intrinsic::ID IntrinsicFunc) {
4860     Function *Fn = M.getFunction(OldFunc);
4861 
4862     if (!Fn)
4863       return;
4864 
4865     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4866 
4867     for (User *U : make_early_inc_range(Fn->users())) {
4868       CallInst *CI = dyn_cast<CallInst>(U);
4869       if (!CI || CI->getCalledFunction() != Fn)
4870         continue;
4871 
4872       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4873       FunctionType *NewFuncTy = NewFn->getFunctionType();
4874       SmallVector<Value *, 2> Args;
4875 
4876       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4877       // value to the return type of the old function.
4878       if (NewFuncTy->getReturnType() != CI->getType() &&
4879           !CastInst::castIsValid(Instruction::BitCast, CI,
4880                                  NewFuncTy->getReturnType()))
4881         continue;
4882 
4883       bool InvalidCast = false;
4884 
4885       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4886         Value *Arg = CI->getArgOperand(I);
4887 
4888         // Bitcast argument to the parameter type of the new function if it's
4889         // not a variadic argument.
4890         if (I < NewFuncTy->getNumParams()) {
4891           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4892           // to the parameter type of the new function.
4893           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4894                                      NewFuncTy->getParamType(I))) {
4895             InvalidCast = true;
4896             break;
4897           }
4898           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4899         }
4900         Args.push_back(Arg);
4901       }
4902 
4903       if (InvalidCast)
4904         continue;
4905 
4906       // Create a call instruction that calls the new function.
4907       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4908       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4909       NewCall->takeName(CI);
4910 
4911       // Bitcast the return value back to the type of the old call.
4912       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4913 
4914       if (!CI->use_empty())
4915         CI->replaceAllUsesWith(NewRetVal);
4916       CI->eraseFromParent();
4917     }
4918 
4919     if (Fn->use_empty())
4920       Fn->eraseFromParent();
4921   };
4922 
4923   // Unconditionally convert a call to "clang.arc.use" to a call to
4924   // "llvm.objc.clang.arc.use".
4925   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4926 
4927   // Upgrade the retain release marker. If there is no need to upgrade
4928   // the marker, that means either the module is already new enough to contain
4929   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4930   if (!UpgradeRetainReleaseMarker(M))
4931     return;
4932 
4933   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4934       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4935       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4936       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4937       {"objc_autoreleaseReturnValue",
4938        llvm::Intrinsic::objc_autoreleaseReturnValue},
4939       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4940       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4941       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4942       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4943       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4944       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4945       {"objc_release", llvm::Intrinsic::objc_release},
4946       {"objc_retain", llvm::Intrinsic::objc_retain},
4947       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4948       {"objc_retainAutoreleaseReturnValue",
4949        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4950       {"objc_retainAutoreleasedReturnValue",
4951        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4952       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4953       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4954       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4955       {"objc_unsafeClaimAutoreleasedReturnValue",
4956        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4957       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4958       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4959       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4960       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4961       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4962       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4963       {"objc_arc_annotation_topdown_bbstart",
4964        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4965       {"objc_arc_annotation_topdown_bbend",
4966        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4967       {"objc_arc_annotation_bottomup_bbstart",
4968        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4969       {"objc_arc_annotation_bottomup_bbend",
4970        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4971 
4972   for (auto &I : RuntimeFuncs)
4973     UpgradeToIntrinsic(I.first, I.second);
4974 }
4975 
4976 bool llvm::UpgradeModuleFlags(Module &M) {
4977   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4978   if (!ModFlags)
4979     return false;
4980 
4981   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4982   bool HasSwiftVersionFlag = false;
4983   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4984   uint32_t SwiftABIVersion;
4985   auto Int8Ty = Type::getInt8Ty(M.getContext());
4986   auto Int32Ty = Type::getInt32Ty(M.getContext());
4987 
4988   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4989     MDNode *Op = ModFlags->getOperand(I);
4990     if (Op->getNumOperands() != 3)
4991       continue;
4992     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4993     if (!ID)
4994       continue;
4995     auto SetBehavior = [&](Module::ModFlagBehavior B) {
4996       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
4997                               Type::getInt32Ty(M.getContext()), B)),
4998                           MDString::get(M.getContext(), ID->getString()),
4999                           Op->getOperand(2)};
5000       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5001       Changed = true;
5002     };
5003 
5004     if (ID->getString() == "Objective-C Image Info Version")
5005       HasObjCFlag = true;
5006     if (ID->getString() == "Objective-C Class Properties")
5007       HasClassProperties = true;
5008     // Upgrade PIC from Error/Max to Min.
5009     if (ID->getString() == "PIC Level") {
5010       if (auto *Behavior =
5011               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5012         uint64_t V = Behavior->getLimitedValue();
5013         if (V == Module::Error || V == Module::Max)
5014           SetBehavior(Module::Min);
5015       }
5016     }
5017     // Upgrade "PIE Level" from Error to Max.
5018     if (ID->getString() == "PIE Level")
5019       if (auto *Behavior =
5020               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5021         if (Behavior->getLimitedValue() == Module::Error)
5022           SetBehavior(Module::Max);
5023 
5024     // Upgrade branch protection and return address signing module flags. The
5025     // module flag behavior for these fields were Error and now they are Min.
5026     if (ID->getString() == "branch-target-enforcement" ||
5027         ID->getString().startswith("sign-return-address")) {
5028       if (auto *Behavior =
5029               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5030         if (Behavior->getLimitedValue() == Module::Error) {
5031           Type *Int32Ty = Type::getInt32Ty(M.getContext());
5032           Metadata *Ops[3] = {
5033               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5034               Op->getOperand(1), Op->getOperand(2)};
5035           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5036           Changed = true;
5037         }
5038       }
5039     }
5040 
5041     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5042     // section name so that llvm-lto will not complain about mismatching
5043     // module flags that is functionally the same.
5044     if (ID->getString() == "Objective-C Image Info Section") {
5045       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5046         SmallVector<StringRef, 4> ValueComp;
5047         Value->getString().split(ValueComp, " ");
5048         if (ValueComp.size() != 1) {
5049           std::string NewValue;
5050           for (auto &S : ValueComp)
5051             NewValue += S.str();
5052           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5053                               MDString::get(M.getContext(), NewValue)};
5054           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5055           Changed = true;
5056         }
5057       }
5058     }
5059 
5060     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5061     // If the higher bits are set, it adds new module flag for swift info.
5062     if (ID->getString() == "Objective-C Garbage Collection") {
5063       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5064       if (Md) {
5065         assert(Md->getValue() && "Expected non-empty metadata");
5066         auto Type = Md->getValue()->getType();
5067         if (Type == Int8Ty)
5068           continue;
5069         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5070         if ((Val & 0xff) != Val) {
5071           HasSwiftVersionFlag = true;
5072           SwiftABIVersion = (Val & 0xff00) >> 8;
5073           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5074           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5075         }
5076         Metadata *Ops[3] = {
5077           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5078           Op->getOperand(1),
5079           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5080         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5081         Changed = true;
5082       }
5083     }
5084   }
5085 
5086   // "Objective-C Class Properties" is recently added for Objective-C. We
5087   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5088   // flag of value 0, so we can correclty downgrade this flag when trying to
5089   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5090   // this module flag.
5091   if (HasObjCFlag && !HasClassProperties) {
5092     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5093                     (uint32_t)0);
5094     Changed = true;
5095   }
5096 
5097   if (HasSwiftVersionFlag) {
5098     M.addModuleFlag(Module::Error, "Swift ABI Version",
5099                     SwiftABIVersion);
5100     M.addModuleFlag(Module::Error, "Swift Major Version",
5101                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5102     M.addModuleFlag(Module::Error, "Swift Minor Version",
5103                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5104     Changed = true;
5105   }
5106 
5107   return Changed;
5108 }
5109 
5110 void llvm::UpgradeSectionAttributes(Module &M) {
5111   auto TrimSpaces = [](StringRef Section) -> std::string {
5112     SmallVector<StringRef, 5> Components;
5113     Section.split(Components, ',');
5114 
5115     SmallString<32> Buffer;
5116     raw_svector_ostream OS(Buffer);
5117 
5118     for (auto Component : Components)
5119       OS << ',' << Component.trim();
5120 
5121     return std::string(OS.str().substr(1));
5122   };
5123 
5124   for (auto &GV : M.globals()) {
5125     if (!GV.hasSection())
5126       continue;
5127 
5128     StringRef Section = GV.getSection();
5129 
5130     if (!Section.startswith("__DATA, __objc_catlist"))
5131       continue;
5132 
5133     // __DATA, __objc_catlist, regular, no_dead_strip
5134     // __DATA,__objc_catlist,regular,no_dead_strip
5135     GV.setSection(TrimSpaces(Section));
5136   }
5137 }
5138 
5139 namespace {
5140 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5141 // callsites within a function that did not also have the strictfp attribute.
5142 // Since 10.0, if strict FP semantics are needed within a function, the
5143 // function must have the strictfp attribute and all calls within the function
5144 // must also have the strictfp attribute. This latter restriction is
5145 // necessary to prevent unwanted libcall simplification when a function is
5146 // being cloned (such as for inlining).
5147 //
5148 // The "dangling" strictfp attribute usage was only used to prevent constant
5149 // folding and other libcall simplification. The nobuiltin attribute on the
5150 // callsite has the same effect.
5151 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5152   StrictFPUpgradeVisitor() = default;
5153 
5154   void visitCallBase(CallBase &Call) {
5155     if (!Call.isStrictFP())
5156       return;
5157     if (isa<ConstrainedFPIntrinsic>(&Call))
5158       return;
5159     // If we get here, the caller doesn't have the strictfp attribute
5160     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5161     Call.removeFnAttr(Attribute::StrictFP);
5162     Call.addFnAttr(Attribute::NoBuiltin);
5163   }
5164 };
5165 } // namespace
5166 
5167 void llvm::UpgradeFunctionAttributes(Function &F) {
5168   // If a function definition doesn't have the strictfp attribute,
5169   // convert any callsite strictfp attributes to nobuiltin.
5170   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5171     StrictFPUpgradeVisitor SFPV;
5172     SFPV.visit(F);
5173   }
5174 
5175   // Remove all incompatibile attributes from function.
5176   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5177   for (auto &Arg : F.args())
5178     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5179 }
5180 
5181 static bool isOldLoopArgument(Metadata *MD) {
5182   auto *T = dyn_cast_or_null<MDTuple>(MD);
5183   if (!T)
5184     return false;
5185   if (T->getNumOperands() < 1)
5186     return false;
5187   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5188   if (!S)
5189     return false;
5190   return S->getString().startswith("llvm.vectorizer.");
5191 }
5192 
5193 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5194   StringRef OldPrefix = "llvm.vectorizer.";
5195   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
5196 
5197   if (OldTag == "llvm.vectorizer.unroll")
5198     return MDString::get(C, "llvm.loop.interleave.count");
5199 
5200   return MDString::get(
5201       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5202              .str());
5203 }
5204 
5205 static Metadata *upgradeLoopArgument(Metadata *MD) {
5206   auto *T = dyn_cast_or_null<MDTuple>(MD);
5207   if (!T)
5208     return MD;
5209   if (T->getNumOperands() < 1)
5210     return MD;
5211   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5212   if (!OldTag)
5213     return MD;
5214   if (!OldTag->getString().startswith("llvm.vectorizer."))
5215     return MD;
5216 
5217   // This has an old tag.  Upgrade it.
5218   SmallVector<Metadata *, 8> Ops;
5219   Ops.reserve(T->getNumOperands());
5220   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5221   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5222     Ops.push_back(T->getOperand(I));
5223 
5224   return MDTuple::get(T->getContext(), Ops);
5225 }
5226 
5227 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5228   auto *T = dyn_cast<MDTuple>(&N);
5229   if (!T)
5230     return &N;
5231 
5232   if (none_of(T->operands(), isOldLoopArgument))
5233     return &N;
5234 
5235   SmallVector<Metadata *, 8> Ops;
5236   Ops.reserve(T->getNumOperands());
5237   for (Metadata *MD : T->operands())
5238     Ops.push_back(upgradeLoopArgument(MD));
5239 
5240   return MDTuple::get(T->getContext(), Ops);
5241 }
5242 
5243 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5244   Triple T(TT);
5245   // The only data layout upgrades needed for pre-GCN are setting the address
5246   // space of globals to 1.
5247   if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
5248       !DL.startswith("G")) {
5249     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5250   }
5251 
5252   if (T.isRISCV64()) {
5253     // Make i32 a native type for 64-bit RISC-V.
5254     auto I = DL.find("-n64-");
5255     if (I != StringRef::npos)
5256       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5257     return DL.str();
5258   }
5259 
5260   std::string Res = DL.str();
5261   // AMDGCN data layout upgrades.
5262   if (T.isAMDGCN()) {
5263     // Define address spaces for constants.
5264     if (!DL.contains("-G") && !DL.starts_with("G"))
5265       Res.append(Res.empty() ? "G1" : "-G1");
5266 
5267     // Add missing non-integral declarations.
5268     // This goes before adding new address spaces to prevent incoherent string
5269     // values.
5270     if (!DL.contains("-ni") && !DL.startswith("ni"))
5271       Res.append("-ni:7:8");
5272     // Update ni:7 to ni:7:8.
5273     if (DL.ends_with("ni:7"))
5274       Res.append(":8");
5275 
5276     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5277     // resources) An empty data layout has already been upgraded to G1 by now.
5278     if (!DL.contains("-p7") && !DL.startswith("p7"))
5279       Res.append("-p7:160:256:256:32");
5280     if (!DL.contains("-p8") && !DL.startswith("p8"))
5281       Res.append("-p8:128:128");
5282 
5283     return Res;
5284   }
5285 
5286   if (!T.isX86())
5287     return Res;
5288 
5289   // If the datalayout matches the expected format, add pointer size address
5290   // spaces to the datalayout.
5291   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5292   if (!DL.contains(AddrSpaces)) {
5293     SmallVector<StringRef, 4> Groups;
5294     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5295     if (R.match(DL, &Groups))
5296       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5297   }
5298 
5299   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5300   // Raising the alignment is safe because Clang did not produce f80 values in
5301   // the MSVC environment before this upgrade was added.
5302   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5303     StringRef Ref = Res;
5304     auto I = Ref.find("-f80:32-");
5305     if (I != StringRef::npos)
5306       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5307   }
5308 
5309   return Res;
5310 }
5311 
5312 void llvm::UpgradeAttributes(AttrBuilder &B) {
5313   StringRef FramePointer;
5314   Attribute A = B.getAttribute("no-frame-pointer-elim");
5315   if (A.isValid()) {
5316     // The value can be "true" or "false".
5317     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5318     B.removeAttribute("no-frame-pointer-elim");
5319   }
5320   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5321     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5322     if (FramePointer != "all")
5323       FramePointer = "non-leaf";
5324     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5325   }
5326   if (!FramePointer.empty())
5327     B.addAttribute("frame-pointer", FramePointer);
5328 
5329   A = B.getAttribute("null-pointer-is-valid");
5330   if (A.isValid()) {
5331     // The value can be "true" or "false".
5332     bool NullPointerIsValid = A.getValueAsString() == "true";
5333     B.removeAttribute("null-pointer-is-valid");
5334     if (NullPointerIsValid)
5335       B.addAttribute(Attribute::NullPointerIsValid);
5336   }
5337 }
5338 
5339 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5340   // clang.arc.attachedcall bundles are now required to have an operand.
5341   // If they don't, it's okay to drop them entirely: when there is an operand,
5342   // the "attachedcall" is meaningful and required, but without an operand,
5343   // it's just a marker NOP.  Dropping it merely prevents an optimization.
5344   erase_if(Bundles, [&](OperandBundleDef &OBD) {
5345     return OBD.getTag() == "clang.arc.attachedcall" &&
5346            OBD.inputs().empty();
5347   });
5348 }
5349