xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include <cstring>
45 
46 using namespace llvm;
47 
48 static cl::opt<bool>
49     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50                                 cl::desc("Disable autoupgrade of debug info"));
51 
52 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53 
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
56 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
57                                   Function *&NewFn) {
58   // Check whether this is an old version of the function, which received
59   // v4f32 arguments.
60   Type *Arg0Type = F->getFunctionType()->getParamType(0);
61   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62     return false;
63 
64   // Yes, it's old, replace it with new version.
65   rename(F);
66   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67   return true;
68 }
69 
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
72 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73                                              Function *&NewFn) {
74   // Check that the last argument is an i32.
75   Type *LastArgType = F->getFunctionType()->getParamType(
76      F->getFunctionType()->getNumParams() - 1);
77   if (!LastArgType->isIntegerTy(32))
78     return false;
79 
80   // Move this function aside and map down.
81   rename(F);
82   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83   return true;
84 }
85 
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
88 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89                                       Function *&NewFn) {
90   // Check if the return type is a vector.
91   if (F->getReturnType()->isVectorTy())
92     return false;
93 
94   rename(F);
95   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96   return true;
97 }
98 
99 static bool UpgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100                                     Function *&NewFn) {
101   if (F->getReturnType()->getScalarType()->isBFloatTy())
102     return false;
103 
104   rename(F);
105   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106   return true;
107 }
108 
109 static bool UpgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110                                       Function *&NewFn) {
111   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112     return false;
113 
114   rename(F);
115   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116   return true;
117 }
118 
119 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120   // All of the intrinsics matches below should be marked with which llvm
121   // version started autoupgrading them. At some point in the future we would
122   // like to use this information to remove upgrade code for some older
123   // intrinsics. It is currently undecided how we will determine that future
124   // point.
125   if (Name.consume_front("avx."))
126     return (Name.starts_with("blend.p") ||        // Added in 3.7
127             Name == "cvt.ps2.pd.256" ||           // Added in 3.9
128             Name == "cvtdq2.pd.256" ||            // Added in 3.9
129             Name == "cvtdq2.ps.256" ||            // Added in 7.0
130             Name.starts_with("movnt.") ||         // Added in 3.2
131             Name.starts_with("sqrt.p") ||         // Added in 7.0
132             Name.starts_with("storeu.") ||        // Added in 3.9
133             Name.starts_with("vbroadcast.s") ||   // Added in 3.5
134             Name.starts_with("vbroadcastf128") || // Added in 4.0
135             Name.starts_with("vextractf128.") ||  // Added in 3.7
136             Name.starts_with("vinsertf128.") ||   // Added in 3.7
137             Name.starts_with("vperm2f128.") ||    // Added in 6.0
138             Name.starts_with("vpermil."));        // Added in 3.1
139 
140   if (Name.consume_front("avx2."))
141     return (Name == "movntdqa" ||             // Added in 5.0
142             Name.starts_with("pabs.") ||      // Added in 6.0
143             Name.starts_with("padds.") ||     // Added in 8.0
144             Name.starts_with("paddus.") ||    // Added in 8.0
145             Name.starts_with("pblendd.") ||   // Added in 3.7
146             Name == "pblendw" ||              // Added in 3.7
147             Name.starts_with("pbroadcast") || // Added in 3.8
148             Name.starts_with("pcmpeq.") ||    // Added in 3.1
149             Name.starts_with("pcmpgt.") ||    // Added in 3.1
150             Name.starts_with("pmax") ||       // Added in 3.9
151             Name.starts_with("pmin") ||       // Added in 3.9
152             Name.starts_with("pmovsx") ||     // Added in 3.9
153             Name.starts_with("pmovzx") ||     // Added in 3.9
154             Name == "pmul.dq" ||              // Added in 7.0
155             Name == "pmulu.dq" ||             // Added in 7.0
156             Name.starts_with("psll.dq") ||    // Added in 3.7
157             Name.starts_with("psrl.dq") ||    // Added in 3.7
158             Name.starts_with("psubs.") ||     // Added in 8.0
159             Name.starts_with("psubus.") ||    // Added in 8.0
160             Name.starts_with("vbroadcast") || // Added in 3.8
161             Name == "vbroadcasti128" ||       // Added in 3.7
162             Name == "vextracti128" ||         // Added in 3.7
163             Name == "vinserti128" ||          // Added in 3.7
164             Name == "vperm2i128");            // Added in 6.0
165 
166   if (Name.consume_front("avx512.")) {
167     if (Name.consume_front("mask."))
168       // 'avx512.mask.*'
169       return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
170               Name.starts_with("and.") ||        // Added in 3.9
171               Name.starts_with("andn.") ||       // Added in 3.9
172               Name.starts_with("broadcast.s") || // Added in 3.9
173               Name.starts_with("broadcastf32x4.") || // Added in 6.0
174               Name.starts_with("broadcastf32x8.") || // Added in 6.0
175               Name.starts_with("broadcastf64x2.") || // Added in 6.0
176               Name.starts_with("broadcastf64x4.") || // Added in 6.0
177               Name.starts_with("broadcasti32x4.") || // Added in 6.0
178               Name.starts_with("broadcasti32x8.") || // Added in 6.0
179               Name.starts_with("broadcasti64x2.") || // Added in 6.0
180               Name.starts_with("broadcasti64x4.") || // Added in 6.0
181               Name.starts_with("cmp.b") ||           // Added in 5.0
182               Name.starts_with("cmp.d") ||           // Added in 5.0
183               Name.starts_with("cmp.q") ||           // Added in 5.0
184               Name.starts_with("cmp.w") ||           // Added in 5.0
185               Name.starts_with("compress.b") ||      // Added in 9.0
186               Name.starts_with("compress.d") ||      // Added in 9.0
187               Name.starts_with("compress.p") ||      // Added in 9.0
188               Name.starts_with("compress.q") ||      // Added in 9.0
189               Name.starts_with("compress.store.") || // Added in 7.0
190               Name.starts_with("compress.w") ||      // Added in 9.0
191               Name.starts_with("conflict.") ||       // Added in 9.0
192               Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
193               Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
194               Name == "cvtpd2dq.256" ||              // Added in 7.0
195               Name == "cvtpd2ps.256" ||              // Added in 7.0
196               Name == "cvtps2pd.128" ||              // Added in 7.0
197               Name == "cvtps2pd.256" ||              // Added in 7.0
198               Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
199               Name == "cvtqq2ps.256" ||              // Added in 9.0
200               Name == "cvtqq2ps.512" ||              // Added in 9.0
201               Name == "cvttpd2dq.256" ||             // Added in 7.0
202               Name == "cvttps2dq.128" ||             // Added in 7.0
203               Name == "cvttps2dq.256" ||             // Added in 7.0
204               Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
205               Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
206               Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
207               Name == "cvtuqq2ps.256" ||             // Added in 9.0
208               Name == "cvtuqq2ps.512" ||             // Added in 9.0
209               Name.starts_with("dbpsadbw.") ||       // Added in 7.0
210               Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
211               Name.starts_with("expand.b") || // Added in 9.0
212               Name.starts_with("expand.d") || // Added in 9.0
213               Name.starts_with("expand.load.") || // Added in 7.0
214               Name.starts_with("expand.p") ||     // Added in 9.0
215               Name.starts_with("expand.q") ||     // Added in 9.0
216               Name.starts_with("expand.w") ||     // Added in 9.0
217               Name.starts_with("fpclass.p") ||    // Added in 7.0
218               Name.starts_with("insert") ||       // Added in 4.0
219               Name.starts_with("load.") ||        // Added in 3.9
220               Name.starts_with("loadu.") ||       // Added in 3.9
221               Name.starts_with("lzcnt.") ||       // Added in 5.0
222               Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
223               Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
224               Name.starts_with("movddup") ||     // Added in 3.9
225               Name.starts_with("move.s") ||      // Added in 4.0
226               Name.starts_with("movshdup") ||    // Added in 3.9
227               Name.starts_with("movsldup") ||    // Added in 3.9
228               Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
229               Name.starts_with("or.") ||         // Added in 3.9
230               Name.starts_with("pabs.") ||       // Added in 6.0
231               Name.starts_with("packssdw.") ||   // Added in 5.0
232               Name.starts_with("packsswb.") ||   // Added in 5.0
233               Name.starts_with("packusdw.") ||   // Added in 5.0
234               Name.starts_with("packuswb.") ||   // Added in 5.0
235               Name.starts_with("padd.") ||       // Added in 4.0
236               Name.starts_with("padds.") ||      // Added in 8.0
237               Name.starts_with("paddus.") ||     // Added in 8.0
238               Name.starts_with("palignr.") ||    // Added in 3.9
239               Name.starts_with("pand.") ||       // Added in 3.9
240               Name.starts_with("pandn.") ||      // Added in 3.9
241               Name.starts_with("pavg") ||        // Added in 6.0
242               Name.starts_with("pbroadcast") ||  // Added in 6.0
243               Name.starts_with("pcmpeq.") ||     // Added in 3.9
244               Name.starts_with("pcmpgt.") ||     // Added in 3.9
245               Name.starts_with("perm.df.") ||    // Added in 3.9
246               Name.starts_with("perm.di.") ||    // Added in 3.9
247               Name.starts_with("permvar.") ||    // Added in 7.0
248               Name.starts_with("pmaddubs.w.") || // Added in 7.0
249               Name.starts_with("pmaddw.d.") ||   // Added in 7.0
250               Name.starts_with("pmax") ||        // Added in 4.0
251               Name.starts_with("pmin") ||        // Added in 4.0
252               Name == "pmov.qd.256" ||           // Added in 9.0
253               Name == "pmov.qd.512" ||           // Added in 9.0
254               Name == "pmov.wb.256" ||           // Added in 9.0
255               Name == "pmov.wb.512" ||           // Added in 9.0
256               Name.starts_with("pmovsx") ||      // Added in 4.0
257               Name.starts_with("pmovzx") ||      // Added in 4.0
258               Name.starts_with("pmul.dq.") ||    // Added in 4.0
259               Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260               Name.starts_with("pmulh.w.") ||    // Added in 7.0
261               Name.starts_with("pmulhu.w.") ||   // Added in 7.0
262               Name.starts_with("pmull.") ||      // Added in 4.0
263               Name.starts_with("pmultishift.qb.") || // Added in 8.0
264               Name.starts_with("pmulu.dq.") ||       // Added in 4.0
265               Name.starts_with("por.") ||            // Added in 3.9
266               Name.starts_with("prol.") ||           // Added in 8.0
267               Name.starts_with("prolv.") ||          // Added in 8.0
268               Name.starts_with("pror.") ||           // Added in 8.0
269               Name.starts_with("prorv.") ||          // Added in 8.0
270               Name.starts_with("pshuf.b.") ||        // Added in 4.0
271               Name.starts_with("pshuf.d.") ||        // Added in 3.9
272               Name.starts_with("pshufh.w.") ||       // Added in 3.9
273               Name.starts_with("pshufl.w.") ||       // Added in 3.9
274               Name.starts_with("psll.d") ||          // Added in 4.0
275               Name.starts_with("psll.q") ||          // Added in 4.0
276               Name.starts_with("psll.w") ||          // Added in 4.0
277               Name.starts_with("pslli") ||           // Added in 4.0
278               Name.starts_with("psllv") ||           // Added in 4.0
279               Name.starts_with("psra.d") ||          // Added in 4.0
280               Name.starts_with("psra.q") ||          // Added in 4.0
281               Name.starts_with("psra.w") ||          // Added in 4.0
282               Name.starts_with("psrai") ||           // Added in 4.0
283               Name.starts_with("psrav") ||           // Added in 4.0
284               Name.starts_with("psrl.d") ||          // Added in 4.0
285               Name.starts_with("psrl.q") ||          // Added in 4.0
286               Name.starts_with("psrl.w") ||          // Added in 4.0
287               Name.starts_with("psrli") ||           // Added in 4.0
288               Name.starts_with("psrlv") ||           // Added in 4.0
289               Name.starts_with("psub.") ||           // Added in 4.0
290               Name.starts_with("psubs.") ||          // Added in 8.0
291               Name.starts_with("psubus.") ||         // Added in 8.0
292               Name.starts_with("pternlog.") ||       // Added in 7.0
293               Name.starts_with("punpckh") ||         // Added in 3.9
294               Name.starts_with("punpckl") ||         // Added in 3.9
295               Name.starts_with("pxor.") ||           // Added in 3.9
296               Name.starts_with("shuf.f") ||          // Added in 6.0
297               Name.starts_with("shuf.i") ||          // Added in 6.0
298               Name.starts_with("shuf.p") ||          // Added in 4.0
299               Name.starts_with("sqrt.p") ||          // Added in 7.0
300               Name.starts_with("store.b.") ||        // Added in 3.9
301               Name.starts_with("store.d.") ||        // Added in 3.9
302               Name.starts_with("store.p") ||         // Added in 3.9
303               Name.starts_with("store.q.") ||        // Added in 3.9
304               Name.starts_with("store.w.") ||        // Added in 3.9
305               Name == "store.ss" ||                  // Added in 7.0
306               Name.starts_with("storeu.") ||         // Added in 3.9
307               Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
308               Name.starts_with("ucmp.") ||       // Added in 5.0
309               Name.starts_with("unpckh.") ||     // Added in 3.9
310               Name.starts_with("unpckl.") ||     // Added in 3.9
311               Name.starts_with("valign.") ||     // Added in 4.0
312               Name == "vcvtph2ps.128" ||         // Added in 11.0
313               Name == "vcvtph2ps.256" ||         // Added in 11.0
314               Name.starts_with("vextract") ||    // Added in 4.0
315               Name.starts_with("vfmadd.") ||     // Added in 7.0
316               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
317               Name.starts_with("vfnmadd.") ||    // Added in 7.0
318               Name.starts_with("vfnmsub.") ||    // Added in 7.0
319               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
320               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
321               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
322               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
323               Name.starts_with("vpermi2var.") || // Added in 7.0
324               Name.starts_with("vpermil.p") ||   // Added in 3.9
325               Name.starts_with("vpermilvar.") || // Added in 4.0
326               Name.starts_with("vpermt2var.") || // Added in 7.0
327               Name.starts_with("vpmadd52") ||    // Added in 7.0
328               Name.starts_with("vpshld.") ||     // Added in 7.0
329               Name.starts_with("vpshldv.") ||    // Added in 8.0
330               Name.starts_with("vpshrd.") ||     // Added in 7.0
331               Name.starts_with("vpshrdv.") ||    // Added in 8.0
332               Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333               Name.starts_with("xor."));           // Added in 3.9
334 
335     if (Name.consume_front("mask3."))
336       // 'avx512.mask3.*'
337       return (Name.starts_with("vfmadd.") ||    // Added in 7.0
338               Name.starts_with("vfmaddsub.") || // Added in 7.0
339               Name.starts_with("vfmsub.") ||    // Added in 7.0
340               Name.starts_with("vfmsubadd.") || // Added in 7.0
341               Name.starts_with("vfnmsub."));    // Added in 7.0
342 
343     if (Name.consume_front("maskz."))
344       // 'avx512.maskz.*'
345       return (Name.starts_with("pternlog.") ||   // Added in 7.0
346               Name.starts_with("vfmadd.") ||     // Added in 7.0
347               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
348               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
349               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
350               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
351               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
352               Name.starts_with("vpermt2var.") || // Added in 7.0
353               Name.starts_with("vpmadd52") ||    // Added in 7.0
354               Name.starts_with("vpshldv.") ||    // Added in 8.0
355               Name.starts_with("vpshrdv."));     // Added in 8.0
356 
357     // 'avx512.*'
358     return (Name == "movntdqa" ||               // Added in 5.0
359             Name == "pmul.dq.512" ||            // Added in 7.0
360             Name == "pmulu.dq.512" ||           // Added in 7.0
361             Name.starts_with("broadcastm") ||   // Added in 6.0
362             Name.starts_with("cmp.p") ||        // Added in 12.0
363             Name.starts_with("cvtb2mask.") ||   // Added in 7.0
364             Name.starts_with("cvtd2mask.") ||   // Added in 7.0
365             Name.starts_with("cvtmask2") ||     // Added in 5.0
366             Name.starts_with("cvtq2mask.") ||   // Added in 7.0
367             Name == "cvtusi2sd" ||              // Added in 7.0
368             Name.starts_with("cvtw2mask.") ||   // Added in 7.0
369             Name == "kand.w" ||                 // Added in 7.0
370             Name == "kandn.w" ||                // Added in 7.0
371             Name == "knot.w" ||                 // Added in 7.0
372             Name == "kor.w" ||                  // Added in 7.0
373             Name == "kortestc.w" ||             // Added in 7.0
374             Name == "kortestz.w" ||             // Added in 7.0
375             Name.starts_with("kunpck") ||       // added in 6.0
376             Name == "kxnor.w" ||                // Added in 7.0
377             Name == "kxor.w" ||                 // Added in 7.0
378             Name.starts_with("padds.") ||       // Added in 8.0
379             Name.starts_with("pbroadcast") ||   // Added in 3.9
380             Name.starts_with("prol") ||         // Added in 8.0
381             Name.starts_with("pror") ||         // Added in 8.0
382             Name.starts_with("psll.dq") ||      // Added in 3.9
383             Name.starts_with("psrl.dq") ||      // Added in 3.9
384             Name.starts_with("psubs.") ||       // Added in 8.0
385             Name.starts_with("ptestm") ||       // Added in 6.0
386             Name.starts_with("ptestnm") ||      // Added in 6.0
387             Name.starts_with("storent.") ||     // Added in 3.9
388             Name.starts_with("vbroadcast.s") || // Added in 7.0
389             Name.starts_with("vpshld.") ||      // Added in 8.0
390             Name.starts_with("vpshrd."));       // Added in 8.0
391   }
392 
393   if (Name.consume_front("fma."))
394     return (Name.starts_with("vfmadd.") ||    // Added in 7.0
395             Name.starts_with("vfmsub.") ||    // Added in 7.0
396             Name.starts_with("vfmsubadd.") || // Added in 7.0
397             Name.starts_with("vfnmadd.") ||   // Added in 7.0
398             Name.starts_with("vfnmsub."));    // Added in 7.0
399 
400   if (Name.consume_front("fma4."))
401     return Name.starts_with("vfmadd.s"); // Added in 7.0
402 
403   if (Name.consume_front("sse."))
404     return (Name == "add.ss" ||            // Added in 4.0
405             Name == "cvtsi2ss" ||          // Added in 7.0
406             Name == "cvtsi642ss" ||        // Added in 7.0
407             Name == "div.ss" ||            // Added in 4.0
408             Name == "mul.ss" ||            // Added in 4.0
409             Name.starts_with("sqrt.p") ||  // Added in 7.0
410             Name == "sqrt.ss" ||           // Added in 7.0
411             Name.starts_with("storeu.") || // Added in 3.9
412             Name == "sub.ss");             // Added in 4.0
413 
414   if (Name.consume_front("sse2."))
415     return (Name == "add.sd" ||            // Added in 4.0
416             Name == "cvtdq2pd" ||          // Added in 3.9
417             Name == "cvtdq2ps" ||          // Added in 7.0
418             Name == "cvtps2pd" ||          // Added in 3.9
419             Name == "cvtsi2sd" ||          // Added in 7.0
420             Name == "cvtsi642sd" ||        // Added in 7.0
421             Name == "cvtss2sd" ||          // Added in 7.0
422             Name == "div.sd" ||            // Added in 4.0
423             Name == "mul.sd" ||            // Added in 4.0
424             Name.starts_with("padds.") ||  // Added in 8.0
425             Name.starts_with("paddus.") || // Added in 8.0
426             Name.starts_with("pcmpeq.") || // Added in 3.1
427             Name.starts_with("pcmpgt.") || // Added in 3.1
428             Name == "pmaxs.w" ||           // Added in 3.9
429             Name == "pmaxu.b" ||           // Added in 3.9
430             Name == "pmins.w" ||           // Added in 3.9
431             Name == "pminu.b" ||           // Added in 3.9
432             Name == "pmulu.dq" ||          // Added in 7.0
433             Name.starts_with("pshuf") ||   // Added in 3.9
434             Name.starts_with("psll.dq") || // Added in 3.7
435             Name.starts_with("psrl.dq") || // Added in 3.7
436             Name.starts_with("psubs.") ||  // Added in 8.0
437             Name.starts_with("psubus.") || // Added in 8.0
438             Name.starts_with("sqrt.p") ||  // Added in 7.0
439             Name == "sqrt.sd" ||           // Added in 7.0
440             Name == "storel.dq" ||         // Added in 3.9
441             Name.starts_with("storeu.") || // Added in 3.9
442             Name == "sub.sd");             // Added in 4.0
443 
444   if (Name.consume_front("sse41."))
445     return (Name.starts_with("blendp") || // Added in 3.7
446             Name == "movntdqa" ||         // Added in 5.0
447             Name == "pblendw" ||          // Added in 3.7
448             Name == "pmaxsb" ||           // Added in 3.9
449             Name == "pmaxsd" ||           // Added in 3.9
450             Name == "pmaxud" ||           // Added in 3.9
451             Name == "pmaxuw" ||           // Added in 3.9
452             Name == "pminsb" ||           // Added in 3.9
453             Name == "pminsd" ||           // Added in 3.9
454             Name == "pminud" ||           // Added in 3.9
455             Name == "pminuw" ||           // Added in 3.9
456             Name.starts_with("pmovsx") || // Added in 3.8
457             Name.starts_with("pmovzx") || // Added in 3.9
458             Name == "pmuldq");            // Added in 7.0
459 
460   if (Name.consume_front("sse42."))
461     return Name == "crc32.64.8"; // Added in 3.4
462 
463   if (Name.consume_front("sse4a."))
464     return Name.starts_with("movnt."); // Added in 3.9
465 
466   if (Name.consume_front("ssse3."))
467     return (Name == "pabs.b.128" || // Added in 6.0
468             Name == "pabs.d.128" || // Added in 6.0
469             Name == "pabs.w.128");  // Added in 6.0
470 
471   if (Name.consume_front("xop."))
472     return (Name == "vpcmov" ||          // Added in 3.8
473             Name == "vpcmov.256" ||      // Added in 5.0
474             Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475             Name.starts_with("vprot"));  // Added in 8.0
476 
477   return (Name == "addcarry.u32" ||        // Added in 8.0
478           Name == "addcarry.u64" ||        // Added in 8.0
479           Name == "addcarryx.u32" ||       // Added in 8.0
480           Name == "addcarryx.u64" ||       // Added in 8.0
481           Name == "subborrow.u32" ||       // Added in 8.0
482           Name == "subborrow.u64" ||       // Added in 8.0
483           Name.starts_with("vcvtph2ps.")); // Added in 11.0
484 }
485 
486 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
487                                         Function *&NewFn) {
488   // Only handle intrinsics that start with "x86.".
489   if (!Name.consume_front("x86."))
490     return false;
491 
492   if (ShouldUpgradeX86Intrinsic(F, Name)) {
493     NewFn = nullptr;
494     return true;
495   }
496 
497   if (Name == "rdtscp") { // Added in 8.0
498     // If this intrinsic has 0 operands, it's the new version.
499     if (F->getFunctionType()->getNumParams() == 0)
500       return false;
501 
502     rename(F);
503     NewFn = Intrinsic::getDeclaration(F->getParent(),
504                                       Intrinsic::x86_rdtscp);
505     return true;
506   }
507 
508   Intrinsic::ID ID;
509 
510   // SSE4.1 ptest functions may have an old signature.
511   if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512     ID = StringSwitch<Intrinsic::ID>(Name)
513              .Case("c", Intrinsic::x86_sse41_ptestc)
514              .Case("z", Intrinsic::x86_sse41_ptestz)
515              .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516              .Default(Intrinsic::not_intrinsic);
517     if (ID != Intrinsic::not_intrinsic)
518       return UpgradePTESTIntrinsic(F, ID, NewFn);
519 
520     return false;
521   }
522 
523   // Several blend and other instructions with masks used the wrong number of
524   // bits.
525 
526   // Added in 3.6
527   ID = StringSwitch<Intrinsic::ID>(Name)
528            .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529            .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530            .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531            .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532            .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533            .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534            .Default(Intrinsic::not_intrinsic);
535   if (ID != Intrinsic::not_intrinsic)
536     return UpgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537 
538   if (Name.consume_front("avx512.mask.cmp.")) {
539     // Added in 7.0
540     ID = StringSwitch<Intrinsic::ID>(Name)
541              .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542              .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543              .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544              .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545              .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546              .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547              .Default(Intrinsic::not_intrinsic);
548     if (ID != Intrinsic::not_intrinsic)
549       return UpgradeX86MaskedFPCompare(F, ID, NewFn);
550     return false; // No other 'x86.avx523.mask.cmp.*'.
551   }
552 
553   if (Name.consume_front("avx512bf16.")) {
554     // Added in 9.0
555     ID = StringSwitch<Intrinsic::ID>(Name)
556              .Case("cvtne2ps2bf16.128",
557                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558              .Case("cvtne2ps2bf16.256",
559                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560              .Case("cvtne2ps2bf16.512",
561                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562              .Case("mask.cvtneps2bf16.128",
563                    Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564              .Case("cvtneps2bf16.256",
565                    Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566              .Case("cvtneps2bf16.512",
567                    Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568              .Default(Intrinsic::not_intrinsic);
569     if (ID != Intrinsic::not_intrinsic)
570       return UpgradeX86BF16Intrinsic(F, ID, NewFn);
571 
572     // Added in 9.0
573     ID = StringSwitch<Intrinsic::ID>(Name)
574              .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575              .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576              .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577              .Default(Intrinsic::not_intrinsic);
578     if (ID != Intrinsic::not_intrinsic)
579       return UpgradeX86BF16DPIntrinsic(F, ID, NewFn);
580     return false; // No other 'x86.avx512bf16.*'.
581   }
582 
583   if (Name.consume_front("xop.")) {
584     Intrinsic::ID ID = Intrinsic::not_intrinsic;
585     if (Name.starts_with("vpermil2")) { // Added in 3.9
586       // Upgrade any XOP PERMIL2 index operand still using a float/double
587       // vector.
588       auto Idx = F->getFunctionType()->getParamType(2);
589       if (Idx->isFPOrFPVectorTy()) {
590         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591         unsigned EltSize = Idx->getScalarSizeInBits();
592         if (EltSize == 64 && IdxSize == 128)
593           ID = Intrinsic::x86_xop_vpermil2pd;
594         else if (EltSize == 32 && IdxSize == 128)
595           ID = Intrinsic::x86_xop_vpermil2ps;
596         else if (EltSize == 64 && IdxSize == 256)
597           ID = Intrinsic::x86_xop_vpermil2pd_256;
598         else
599           ID = Intrinsic::x86_xop_vpermil2ps_256;
600       }
601     } else if (F->arg_size() == 2)
602       // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603       ID = StringSwitch<Intrinsic::ID>(Name)
604                .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605                .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606                .Default(Intrinsic::not_intrinsic);
607 
608     if (ID != Intrinsic::not_intrinsic) {
609       rename(F);
610       NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611       return true;
612     }
613     return false; // No other 'x86.xop.*'
614   }
615 
616   if (Name == "seh.recoverfp") {
617     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618     return true;
619   }
620 
621   return false;
622 }
623 
624 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
626 static bool UpgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627                                                  StringRef Name,
628                                                  Function *&NewFn) {
629   if (Name.starts_with("rbit")) {
630     // '(arm|aarch64).rbit'.
631     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632                                       F->arg_begin()->getType());
633     return true;
634   }
635 
636   if (Name == "thread.pointer") {
637     // '(arm|aarch64).thread.pointer'.
638     NewFn =
639         Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640     return true;
641   }
642 
643   bool Neon = Name.consume_front("neon.");
644   if (Neon) {
645     // '(arm|aarch64).neon.*'.
646     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647     // v16i8 respectively.
648     if (Name.consume_front("bfdot.")) {
649       // (arm|aarch64).neon.bfdot.*'.
650       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
651                              .Cases("v2f32.v8i8", "v4f32.v16i8",
652                                     IsArm ? Intrinsic::arm_neon_bfdot
653                                           : Intrinsic::aarch64_neon_bfdot)
654                              .Default(Intrinsic::not_intrinsic);
655       if (ID != Intrinsic::not_intrinsic) {
656         size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
657         assert((OperandWidth == 64 || OperandWidth == 128) &&
658                "Unexpected operand width");
659         LLVMContext &Ctx = F->getParent()->getContext();
660         std::array<Type *, 2> Tys{
661             {F->getReturnType(),
662              FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
663         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
664         return true;
665       }
666       return false; // No other '(arm|aarch64).neon.bfdot.*'.
667     }
668 
669     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
670     // anymore and accept v8bf16 instead of v16i8.
671     if (Name.consume_front("bfm")) {
672       // (arm|aarch64).neon.bfm*'.
673       if (Name.consume_back(".v4f32.v16i8")) {
674         // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
675         Intrinsic::ID ID =
676             StringSwitch<Intrinsic::ID>(Name)
677                 .Case("mla", IsArm ? Intrinsic::arm_neon_bfmmla
678                                    : Intrinsic::aarch64_neon_bfmmla)
679                 .Case("lalb", IsArm ? Intrinsic::arm_neon_bfmlalb
680                                     : Intrinsic::aarch64_neon_bfmlalb)
681                 .Case("lalt", IsArm ? Intrinsic::arm_neon_bfmlalt
682                                     : Intrinsic::aarch64_neon_bfmlalt)
683                 .Default(Intrinsic::not_intrinsic);
684         if (ID != Intrinsic::not_intrinsic) {
685           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
686           return true;
687         }
688         return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
689       }
690       return false; // No other '(arm|aarch64).neon.bfm*.
691     }
692     // Continue on to Aarch64 Neon or Arm Neon.
693   }
694   // Continue on to Arm or Aarch64.
695 
696   if (IsArm) {
697     // 'arm.*'.
698     if (Neon) {
699       // 'arm.neon.*'.
700       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
701                              .StartsWith("vclz.", Intrinsic::ctlz)
702                              .StartsWith("vcnt.", Intrinsic::ctpop)
703                              .StartsWith("vqadds.", Intrinsic::sadd_sat)
704                              .StartsWith("vqaddu.", Intrinsic::uadd_sat)
705                              .StartsWith("vqsubs.", Intrinsic::ssub_sat)
706                              .StartsWith("vqsubu.", Intrinsic::usub_sat)
707                              .Default(Intrinsic::not_intrinsic);
708       if (ID != Intrinsic::not_intrinsic) {
709         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
710                                           F->arg_begin()->getType());
711         return true;
712       }
713 
714       if (Name.consume_front("vst")) {
715         // 'arm.neon.vst*'.
716         static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
717         SmallVector<StringRef, 2> Groups;
718         if (vstRegex.match(Name, &Groups)) {
719           static const Intrinsic::ID StoreInts[] = {
720               Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
721               Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
722 
723           static const Intrinsic::ID StoreLaneInts[] = {
724               Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
725               Intrinsic::arm_neon_vst4lane};
726 
727           auto fArgs = F->getFunctionType()->params();
728           Type *Tys[] = {fArgs[0], fArgs[1]};
729           if (Groups[1].size() == 1)
730             NewFn = Intrinsic::getDeclaration(F->getParent(),
731                                               StoreInts[fArgs.size() - 3], Tys);
732           else
733             NewFn = Intrinsic::getDeclaration(
734                 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
735           return true;
736         }
737         return false; // No other 'arm.neon.vst*'.
738       }
739 
740       return false; // No other 'arm.neon.*'.
741     }
742 
743     if (Name.consume_front("mve.")) {
744       // 'arm.mve.*'.
745       if (Name == "vctp64") {
746         if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
747           // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
748           // the function and deal with it below in UpgradeIntrinsicCall.
749           rename(F);
750           return true;
751         }
752         return false; // Not 'arm.mve.vctp64'.
753       }
754 
755       // These too are changed to accept a v2i1 instead of the old v4i1.
756       if (Name.consume_back(".v4i1")) {
757         // 'arm.mve.*.v4i1'.
758         if (Name.consume_back(".predicated.v2i64.v4i32"))
759           // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
760           return Name == "mull.int" || Name == "vqdmull";
761 
762         if (Name.consume_back(".v2i64")) {
763           // 'arm.mve.*.v2i64.v4i1'
764           bool IsGather = Name.consume_front("vldr.gather.");
765           if (IsGather || Name.consume_front("vstr.scatter.")) {
766             if (Name.consume_front("base.")) {
767               // Optional 'wb.' prefix.
768               Name.consume_front("wb.");
769               // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
770               // predicated.v2i64.v2i64.v4i1'.
771               return Name == "predicated.v2i64";
772             }
773 
774             if (Name.consume_front("offset.predicated."))
775               return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
776                      Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
777 
778             // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
779             return false;
780           }
781 
782           return false; // No other 'arm.mve.*.v2i64.v4i1'.
783         }
784         return false; // No other 'arm.mve.*.v4i1'.
785       }
786       return false; // No other 'arm.mve.*'.
787     }
788 
789     if (Name.consume_front("cde.vcx")) {
790       // 'arm.cde.vcx*'.
791       if (Name.consume_back(".predicated.v2i64.v4i1"))
792         // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
793         return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
794                Name == "3q" || Name == "3qa";
795 
796       return false; // No other 'arm.cde.vcx*'.
797     }
798   } else {
799     // 'aarch64.*'.
800     if (Neon) {
801       // 'aarch64.neon.*'.
802       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
803                              .StartsWith("frintn", Intrinsic::roundeven)
804                              .StartsWith("rbit", Intrinsic::bitreverse)
805                              .Default(Intrinsic::not_intrinsic);
806       if (ID != Intrinsic::not_intrinsic) {
807         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
808                                           F->arg_begin()->getType());
809         return true;
810       }
811 
812       if (Name.starts_with("addp")) {
813         // 'aarch64.neon.addp*'.
814         if (F->arg_size() != 2)
815           return false; // Invalid IR.
816         VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
817         if (Ty && Ty->getElementType()->isFloatingPointTy()) {
818           NewFn = Intrinsic::getDeclaration(F->getParent(),
819                                             Intrinsic::aarch64_neon_faddp, Ty);
820           return true;
821         }
822       }
823       return false; // No other 'aarch64.neon.*'.
824     }
825     if (Name.consume_front("sve.")) {
826       // 'aarch64.sve.*'.
827       if (Name.consume_front("bf")) {
828         if (Name.consume_back(".lane")) {
829           // 'aarch64.sve.bf*.lane'.
830           Intrinsic::ID ID =
831               StringSwitch<Intrinsic::ID>(Name)
832                   .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
833                   .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
834                   .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
835                   .Default(Intrinsic::not_intrinsic);
836           if (ID != Intrinsic::not_intrinsic) {
837             NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
838             return true;
839           }
840           return false; // No other 'aarch64.sve.bf*.lane'.
841         }
842         return false; // No other 'aarch64.sve.bf*'.
843       }
844 
845       if (Name.consume_front("ld")) {
846         // 'aarch64.sve.ld*'.
847         static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
848         if (LdRegex.match(Name)) {
849           Type *ScalarTy =
850               dyn_cast<VectorType>(F->getReturnType())->getElementType();
851           ElementCount EC = dyn_cast<VectorType>(F->arg_begin()->getType())
852                                 ->getElementCount();
853           Type *Ty = VectorType::get(ScalarTy, EC);
854           static const Intrinsic::ID LoadIDs[] = {
855               Intrinsic::aarch64_sve_ld2_sret,
856               Intrinsic::aarch64_sve_ld3_sret,
857               Intrinsic::aarch64_sve_ld4_sret,
858           };
859           NewFn = Intrinsic::getDeclaration(F->getParent(),
860                                             LoadIDs[Name[0] - '2'], Ty);
861           return true;
862         }
863         return false; // No other 'aarch64.sve.ld*'.
864       }
865 
866       if (Name.consume_front("tuple.")) {
867         // 'aarch64.sve.tuple.*'.
868         if (Name.starts_with("get")) {
869           // 'aarch64.sve.tuple.get*'.
870           Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
871           NewFn = Intrinsic::getDeclaration(F->getParent(),
872                                             Intrinsic::vector_extract, Tys);
873           return true;
874         }
875 
876         if (Name.starts_with("set")) {
877           // 'aarch64.sve.tuple.set*'.
878           auto Args = F->getFunctionType()->params();
879           Type *Tys[] = {Args[0], Args[2], Args[1]};
880           NewFn = Intrinsic::getDeclaration(F->getParent(),
881                                             Intrinsic::vector_insert, Tys);
882           return true;
883         }
884 
885         static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
886         if (CreateTupleRegex.match(Name)) {
887           // 'aarch64.sve.tuple.create*'.
888           auto Args = F->getFunctionType()->params();
889           Type *Tys[] = {F->getReturnType(), Args[1]};
890           NewFn = Intrinsic::getDeclaration(F->getParent(),
891                                             Intrinsic::vector_insert, Tys);
892           return true;
893         }
894         return false; // No other 'aarch64.sve.tuple.*'.
895       }
896       return false; // No other 'aarch64.sve.*'.
897     }
898   }
899   return false; // No other 'arm.*', 'aarch64.*'.
900 }
901 
902 static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
903   if (Name.consume_front("abs."))
904     return StringSwitch<Intrinsic::ID>(Name)
905         .Case("bf16", Intrinsic::nvvm_abs_bf16)
906         .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
907         .Default(Intrinsic::not_intrinsic);
908 
909   if (Name.consume_front("fma.rn."))
910     return StringSwitch<Intrinsic::ID>(Name)
911         .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
912         .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
913         .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
914         .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
915         .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
916         .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
917         .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
918         .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
919         .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
920         .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
921         .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
922         .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
923         .Default(Intrinsic::not_intrinsic);
924 
925   if (Name.consume_front("fmax."))
926     return StringSwitch<Intrinsic::ID>(Name)
927         .Case("bf16", Intrinsic::nvvm_fmax_bf16)
928         .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
929         .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
930         .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
931         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
932         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
933         .Case("ftz.nan.xorsign.abs.bf16",
934               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
935         .Case("ftz.nan.xorsign.abs.bf16x2",
936               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
937         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
938         .Case("ftz.xorsign.abs.bf16x2",
939               Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
940         .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
941         .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
942         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
943         .Case("nan.xorsign.abs.bf16x2",
944               Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
945         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
946         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
947         .Default(Intrinsic::not_intrinsic);
948 
949   if (Name.consume_front("fmin."))
950     return StringSwitch<Intrinsic::ID>(Name)
951         .Case("bf16", Intrinsic::nvvm_fmin_bf16)
952         .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
953         .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
954         .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
955         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
956         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
957         .Case("ftz.nan.xorsign.abs.bf16",
958               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
959         .Case("ftz.nan.xorsign.abs.bf16x2",
960               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
961         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
962         .Case("ftz.xorsign.abs.bf16x2",
963               Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
964         .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
965         .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
966         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
967         .Case("nan.xorsign.abs.bf16x2",
968               Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
969         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
970         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
971         .Default(Intrinsic::not_intrinsic);
972 
973   if (Name.consume_front("neg."))
974     return StringSwitch<Intrinsic::ID>(Name)
975         .Case("bf16", Intrinsic::nvvm_neg_bf16)
976         .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
977         .Default(Intrinsic::not_intrinsic);
978 
979   return Intrinsic::not_intrinsic;
980 }
981 
982 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
983   assert(F && "Illegal to upgrade a non-existent Function.");
984 
985   StringRef Name = F->getName();
986 
987   // Quickly eliminate it, if it's not a candidate.
988   if (!Name.consume_front("llvm.") || Name.empty())
989     return false;
990 
991   switch (Name[0]) {
992   default: break;
993   case 'a': {
994     bool IsArm = Name.consume_front("arm.");
995     if (IsArm || Name.consume_front("aarch64.")) {
996       if (UpgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
997         return true;
998       break;
999     }
1000 
1001     if (Name.consume_front("amdgcn.")) {
1002       if (Name == "alignbit") {
1003         // Target specific intrinsic became redundant
1004         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1005                                           {F->getReturnType()});
1006         return true;
1007       }
1008 
1009       if (Name.consume_front("atomic.")) {
1010         if (Name.starts_with("inc") || Name.starts_with("dec")) {
1011           // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1012           // there's no new declaration.
1013           NewFn = nullptr;
1014           return true;
1015         }
1016         break; // No other 'amdgcn.atomic.*'
1017       }
1018 
1019       if (Name.starts_with("ldexp.")) {
1020         // Target specific intrinsic became redundant
1021         NewFn = Intrinsic::getDeclaration(
1022           F->getParent(), Intrinsic::ldexp,
1023           {F->getReturnType(), F->getArg(1)->getType()});
1024         return true;
1025       }
1026       break; // No other 'amdgcn.*'
1027     }
1028 
1029     break;
1030   }
1031   case 'c': {
1032     if (F->arg_size() == 1) {
1033       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1034                              .StartsWith("ctlz.", Intrinsic::ctlz)
1035                              .StartsWith("cttz.", Intrinsic::cttz)
1036                              .Default(Intrinsic::not_intrinsic);
1037       if (ID != Intrinsic::not_intrinsic) {
1038         rename(F);
1039         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1040                                           F->arg_begin()->getType());
1041         return true;
1042       }
1043     }
1044 
1045     if (F->arg_size() == 2 && Name.equals("coro.end")) {
1046       rename(F);
1047       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1048       return true;
1049     }
1050 
1051     break;
1052   }
1053   case 'd':
1054     if (Name.consume_front("dbg.")) {
1055       if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1056         rename(F);
1057         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1058         return true;
1059       }
1060       break; // No other 'dbg.*'.
1061     }
1062     break;
1063   case 'e':
1064     if (Name.consume_front("experimental.vector.")) {
1065       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1066                              .StartsWith("extract.", Intrinsic::vector_extract)
1067                              .StartsWith("insert.", Intrinsic::vector_insert)
1068                              .Default(Intrinsic::not_intrinsic);
1069       if (ID != Intrinsic::not_intrinsic) {
1070         const auto *FT = F->getFunctionType();
1071         SmallVector<Type *, 2> Tys;
1072         if (ID == Intrinsic::vector_extract)
1073           // Extracting overloads the return type.
1074           Tys.push_back(FT->getReturnType());
1075         Tys.push_back(FT->getParamType(0));
1076         if (ID == Intrinsic::vector_insert)
1077           // Inserting overloads the inserted type.
1078           Tys.push_back(FT->getParamType(1));
1079         rename(F);
1080         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1081         return true;
1082       }
1083 
1084       if (Name.consume_front("reduce.")) {
1085         SmallVector<StringRef, 2> Groups;
1086         static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1087         if (R.match(Name, &Groups))
1088           ID = StringSwitch<Intrinsic::ID>(Groups[1])
1089                    .Case("add", Intrinsic::vector_reduce_add)
1090                    .Case("mul", Intrinsic::vector_reduce_mul)
1091                    .Case("and", Intrinsic::vector_reduce_and)
1092                    .Case("or", Intrinsic::vector_reduce_or)
1093                    .Case("xor", Intrinsic::vector_reduce_xor)
1094                    .Case("smax", Intrinsic::vector_reduce_smax)
1095                    .Case("smin", Intrinsic::vector_reduce_smin)
1096                    .Case("umax", Intrinsic::vector_reduce_umax)
1097                    .Case("umin", Intrinsic::vector_reduce_umin)
1098                    .Case("fmax", Intrinsic::vector_reduce_fmax)
1099                    .Case("fmin", Intrinsic::vector_reduce_fmin)
1100                    .Default(Intrinsic::not_intrinsic);
1101 
1102         bool V2 = false;
1103         if (ID == Intrinsic::not_intrinsic) {
1104           static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1105           Groups.clear();
1106           V2 = true;
1107           if (R2.match(Name, &Groups))
1108             ID = StringSwitch<Intrinsic::ID>(Groups[1])
1109                      .Case("fadd", Intrinsic::vector_reduce_fadd)
1110                      .Case("fmul", Intrinsic::vector_reduce_fmul)
1111                      .Default(Intrinsic::not_intrinsic);
1112         }
1113         if (ID != Intrinsic::not_intrinsic) {
1114           rename(F);
1115           auto Args = F->getFunctionType()->params();
1116           NewFn =
1117               Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1118           return true;
1119         }
1120         break; // No other 'expermental.vector.reduce.*'.
1121       }
1122       break; // No other 'experimental.vector.*'.
1123     }
1124     break; // No other 'e*'.
1125   case 'f':
1126     if (Name.starts_with("flt.rounds")) {
1127       rename(F);
1128       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1129       return true;
1130     }
1131     break;
1132   case 'i':
1133     if (Name.starts_with("invariant.group.barrier")) {
1134       // Rename invariant.group.barrier to launder.invariant.group
1135       auto Args = F->getFunctionType()->params();
1136       Type* ObjectPtr[1] = {Args[0]};
1137       rename(F);
1138       NewFn = Intrinsic::getDeclaration(F->getParent(),
1139           Intrinsic::launder_invariant_group, ObjectPtr);
1140       return true;
1141     }
1142     break;
1143   case 'm': {
1144     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1145     // alignment parameter to embedding the alignment as an attribute of
1146     // the pointer args.
1147     if (unsigned ID = StringSwitch<unsigned>(Name)
1148                           .StartsWith("memcpy.", Intrinsic::memcpy)
1149                           .StartsWith("memmove.", Intrinsic::memmove)
1150                           .Default(0)) {
1151       if (F->arg_size() == 5) {
1152         rename(F);
1153         // Get the types of dest, src, and len
1154         ArrayRef<Type *> ParamTypes =
1155             F->getFunctionType()->params().slice(0, 3);
1156         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1157         return true;
1158       }
1159     }
1160     if (Name.starts_with("memset.") && F->arg_size() == 5) {
1161       rename(F);
1162       // Get the types of dest, and len
1163       const auto *FT = F->getFunctionType();
1164       Type *ParamTypes[2] = {
1165           FT->getParamType(0), // Dest
1166           FT->getParamType(2)  // len
1167       };
1168       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1169                                         ParamTypes);
1170       return true;
1171     }
1172     break;
1173   }
1174   case 'n': {
1175     if (Name.consume_front("nvvm.")) {
1176       // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1177       if (F->arg_size() == 1) {
1178         Intrinsic::ID IID =
1179             StringSwitch<Intrinsic::ID>(Name)
1180                 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1181                 .Case("clz.i", Intrinsic::ctlz)
1182                 .Case("popc.i", Intrinsic::ctpop)
1183                 .Default(Intrinsic::not_intrinsic);
1184         if (IID != Intrinsic::not_intrinsic) {
1185           NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1186                                             {F->getReturnType()});
1187           return true;
1188         }
1189       }
1190 
1191       // Check for nvvm intrinsics that need a return type adjustment.
1192       if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1193         Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
1194         if (IID != Intrinsic::not_intrinsic) {
1195           NewFn = nullptr;
1196           return true;
1197         }
1198       }
1199 
1200       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1201       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1202       //
1203       // TODO: We could add lohi.i2d.
1204       bool Expand = false;
1205       if (Name.consume_front("abs."))
1206         // nvvm.abs.{i,ii}
1207         Expand = Name == "i" || Name == "ll";
1208       else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1209         Expand = true;
1210       else if (Name.consume_front("max.") || Name.consume_front("min."))
1211         // nvvm.{min,max}.{i,ii,ui,ull}
1212         Expand = Name == "i" || Name == "ll" || Name == "ui" || Name == "ull";
1213       else if (Name.consume_front("atomic.load.add."))
1214         // nvvm.atomic.load.add.{f32.p,f64.p}
1215         Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1216       else
1217         Expand = false;
1218 
1219       if (Expand) {
1220         NewFn = nullptr;
1221         return true;
1222       }
1223       break; // No other 'nvvm.*'.
1224     }
1225     break;
1226   }
1227   case 'o':
1228     // We only need to change the name to match the mangling including the
1229     // address space.
1230     if (Name.starts_with("objectsize.")) {
1231       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1232       if (F->arg_size() == 2 || F->arg_size() == 3 ||
1233           F->getName() !=
1234               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1235         rename(F);
1236         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1237                                           Tys);
1238         return true;
1239       }
1240     }
1241     break;
1242 
1243   case 'p':
1244     if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1245       rename(F);
1246       NewFn = Intrinsic::getDeclaration(
1247           F->getParent(), Intrinsic::ptr_annotation,
1248           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1249       return true;
1250     }
1251     break;
1252 
1253   case 'r': {
1254     if (Name.consume_front("riscv.")) {
1255       Intrinsic::ID ID;
1256       ID = StringSwitch<Intrinsic::ID>(Name)
1257                .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1258                .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1259                .Case("aes32esi", Intrinsic::riscv_aes32esi)
1260                .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1261                .Default(Intrinsic::not_intrinsic);
1262       if (ID != Intrinsic::not_intrinsic) {
1263         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1264           rename(F);
1265           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1266           return true;
1267         }
1268         break; // No other applicable upgrades.
1269       }
1270 
1271       ID = StringSwitch<Intrinsic::ID>(Name)
1272                .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1273                .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1274                .Default(Intrinsic::not_intrinsic);
1275       if (ID != Intrinsic::not_intrinsic) {
1276         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1277             F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1278           rename(F);
1279           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1280           return true;
1281         }
1282         break; // No other applicable upgrades.
1283       }
1284 
1285       ID = StringSwitch<Intrinsic::ID>(Name)
1286                .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1287                .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1288                .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1289                .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1290                .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1291                .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1292                .Default(Intrinsic::not_intrinsic);
1293       if (ID != Intrinsic::not_intrinsic) {
1294         if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1295           rename(F);
1296           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1297           return true;
1298         }
1299         break; // No other applicable upgrades.
1300       }
1301       break; // No other 'riscv.*' intrinsics
1302     }
1303   } break;
1304 
1305   case 's':
1306     if (Name == "stackprotectorcheck") {
1307       NewFn = nullptr;
1308       return true;
1309     }
1310     break;
1311 
1312   case 'v': {
1313     if (Name == "var.annotation" && F->arg_size() == 4) {
1314       rename(F);
1315       NewFn = Intrinsic::getDeclaration(
1316           F->getParent(), Intrinsic::var_annotation,
1317           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1318       return true;
1319     }
1320     break;
1321   }
1322 
1323   case 'w':
1324     if (Name.consume_front("wasm.")) {
1325       Intrinsic::ID ID =
1326           StringSwitch<Intrinsic::ID>(Name)
1327               .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1328               .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1329               .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1330               .Default(Intrinsic::not_intrinsic);
1331       if (ID != Intrinsic::not_intrinsic) {
1332         rename(F);
1333         NewFn =
1334             Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1335         return true;
1336       }
1337 
1338       if (Name.consume_front("dot.i8x16.i7x16.")) {
1339         ID = StringSwitch<Intrinsic::ID>(Name)
1340                  .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1341                  .Case("add.signed",
1342                        Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1343                  .Default(Intrinsic::not_intrinsic);
1344         if (ID != Intrinsic::not_intrinsic) {
1345           rename(F);
1346           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1347           return true;
1348         }
1349         break; // No other 'wasm.dot.i8x16.i7x16.*'.
1350       }
1351       break; // No other 'wasm.*'.
1352     }
1353     break;
1354 
1355   case 'x':
1356     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1357       return true;
1358   }
1359 
1360   auto *ST = dyn_cast<StructType>(F->getReturnType());
1361   if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1362       F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1363     // Replace return type with literal non-packed struct. Only do this for
1364     // intrinsics declared to return a struct, not for intrinsics with
1365     // overloaded return type, in which case the exact struct type will be
1366     // mangled into the name.
1367     SmallVector<Intrinsic::IITDescriptor> Desc;
1368     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1369     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1370       auto *FT = F->getFunctionType();
1371       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1372       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1373       std::string Name = F->getName().str();
1374       rename(F);
1375       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1376                                Name, F->getParent());
1377 
1378       // The new function may also need remangling.
1379       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1380         NewFn = *Result;
1381       return true;
1382     }
1383   }
1384 
1385   // Remangle our intrinsic since we upgrade the mangling
1386   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1387   if (Result != std::nullopt) {
1388     NewFn = *Result;
1389     return true;
1390   }
1391 
1392   //  This may not belong here. This function is effectively being overloaded
1393   //  to both detect an intrinsic which needs upgrading, and to provide the
1394   //  upgraded form of the intrinsic. We should perhaps have two separate
1395   //  functions for this.
1396   return false;
1397 }
1398 
1399 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1400   NewFn = nullptr;
1401   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1402   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1403 
1404   // Upgrade intrinsic attributes.  This does not change the function.
1405   if (NewFn)
1406     F = NewFn;
1407   if (Intrinsic::ID id = F->getIntrinsicID())
1408     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1409   return Upgraded;
1410 }
1411 
1412 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1413   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1414                           GV->getName() == "llvm.global_dtors")) ||
1415       !GV->hasInitializer())
1416     return nullptr;
1417   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1418   if (!ATy)
1419     return nullptr;
1420   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1421   if (!STy || STy->getNumElements() != 2)
1422     return nullptr;
1423 
1424   LLVMContext &C = GV->getContext();
1425   IRBuilder<> IRB(C);
1426   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1427                                IRB.getPtrTy());
1428   Constant *Init = GV->getInitializer();
1429   unsigned N = Init->getNumOperands();
1430   std::vector<Constant *> NewCtors(N);
1431   for (unsigned i = 0; i != N; ++i) {
1432     auto Ctor = cast<Constant>(Init->getOperand(i));
1433     NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1434                                       Ctor->getAggregateElement(1),
1435                                       Constant::getNullValue(IRB.getPtrTy()));
1436   }
1437   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1438 
1439   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1440                             NewInit, GV->getName());
1441 }
1442 
1443 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1444 // to byte shuffles.
1445 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1446                                          Value *Op, unsigned Shift) {
1447   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1448   unsigned NumElts = ResultTy->getNumElements() * 8;
1449 
1450   // Bitcast from a 64-bit element type to a byte element type.
1451   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1452   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1453 
1454   // We'll be shuffling in zeroes.
1455   Value *Res = Constant::getNullValue(VecTy);
1456 
1457   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1458   // we'll just return the zero vector.
1459   if (Shift < 16) {
1460     int Idxs[64];
1461     // 256/512-bit version is split into 2/4 16-byte lanes.
1462     for (unsigned l = 0; l != NumElts; l += 16)
1463       for (unsigned i = 0; i != 16; ++i) {
1464         unsigned Idx = NumElts + i - Shift;
1465         if (Idx < NumElts)
1466           Idx -= NumElts - 16; // end of lane, switch operand.
1467         Idxs[l + i] = Idx + l;
1468       }
1469 
1470     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1471   }
1472 
1473   // Bitcast back to a 64-bit element type.
1474   return Builder.CreateBitCast(Res, ResultTy, "cast");
1475 }
1476 
1477 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1478 // to byte shuffles.
1479 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1480                                          unsigned Shift) {
1481   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1482   unsigned NumElts = ResultTy->getNumElements() * 8;
1483 
1484   // Bitcast from a 64-bit element type to a byte element type.
1485   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1486   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1487 
1488   // We'll be shuffling in zeroes.
1489   Value *Res = Constant::getNullValue(VecTy);
1490 
1491   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1492   // we'll just return the zero vector.
1493   if (Shift < 16) {
1494     int Idxs[64];
1495     // 256/512-bit version is split into 2/4 16-byte lanes.
1496     for (unsigned l = 0; l != NumElts; l += 16)
1497       for (unsigned i = 0; i != 16; ++i) {
1498         unsigned Idx = i + Shift;
1499         if (Idx >= 16)
1500           Idx += NumElts - 16; // end of lane, switch operand.
1501         Idxs[l + i] = Idx + l;
1502       }
1503 
1504     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1505   }
1506 
1507   // Bitcast back to a 64-bit element type.
1508   return Builder.CreateBitCast(Res, ResultTy, "cast");
1509 }
1510 
1511 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1512                             unsigned NumElts) {
1513   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1514   llvm::VectorType *MaskTy = FixedVectorType::get(
1515       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1516   Mask = Builder.CreateBitCast(Mask, MaskTy);
1517 
1518   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1519   // i8 and we need to extract down to the right number of elements.
1520   if (NumElts <= 4) {
1521     int Indices[4];
1522     for (unsigned i = 0; i != NumElts; ++i)
1523       Indices[i] = i;
1524     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1525                                        "extract");
1526   }
1527 
1528   return Mask;
1529 }
1530 
1531 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1532                             Value *Op0, Value *Op1) {
1533   // If the mask is all ones just emit the first operation.
1534   if (const auto *C = dyn_cast<Constant>(Mask))
1535     if (C->isAllOnesValue())
1536       return Op0;
1537 
1538   Mask = getX86MaskVec(Builder, Mask,
1539                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1540   return Builder.CreateSelect(Mask, Op0, Op1);
1541 }
1542 
1543 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1544                                   Value *Op0, Value *Op1) {
1545   // If the mask is all ones just emit the first operation.
1546   if (const auto *C = dyn_cast<Constant>(Mask))
1547     if (C->isAllOnesValue())
1548       return Op0;
1549 
1550   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1551                                       Mask->getType()->getIntegerBitWidth());
1552   Mask = Builder.CreateBitCast(Mask, MaskTy);
1553   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1554   return Builder.CreateSelect(Mask, Op0, Op1);
1555 }
1556 
1557 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1558 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1559 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1560 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1561                                         Value *Op1, Value *Shift,
1562                                         Value *Passthru, Value *Mask,
1563                                         bool IsVALIGN) {
1564   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1565 
1566   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1567   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1568   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1569   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1570 
1571   // Mask the immediate for VALIGN.
1572   if (IsVALIGN)
1573     ShiftVal &= (NumElts - 1);
1574 
1575   // If palignr is shifting the pair of vectors more than the size of two
1576   // lanes, emit zero.
1577   if (ShiftVal >= 32)
1578     return llvm::Constant::getNullValue(Op0->getType());
1579 
1580   // If palignr is shifting the pair of input vectors more than one lane,
1581   // but less than two lanes, convert to shifting in zeroes.
1582   if (ShiftVal > 16) {
1583     ShiftVal -= 16;
1584     Op1 = Op0;
1585     Op0 = llvm::Constant::getNullValue(Op0->getType());
1586   }
1587 
1588   int Indices[64];
1589   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1590   for (unsigned l = 0; l < NumElts; l += 16) {
1591     for (unsigned i = 0; i != 16; ++i) {
1592       unsigned Idx = ShiftVal + i;
1593       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1594         Idx += NumElts - 16; // End of lane, switch operand.
1595       Indices[l + i] = Idx + l;
1596     }
1597   }
1598 
1599   Value *Align = Builder.CreateShuffleVector(
1600       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1601 
1602   return EmitX86Select(Builder, Mask, Align, Passthru);
1603 }
1604 
1605 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1606                                           bool ZeroMask, bool IndexForm) {
1607   Type *Ty = CI.getType();
1608   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1609   unsigned EltWidth = Ty->getScalarSizeInBits();
1610   bool IsFloat = Ty->isFPOrFPVectorTy();
1611   Intrinsic::ID IID;
1612   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1613     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1614   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1615     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1616   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1617     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1618   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1619     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1620   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1621     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1622   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1623     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1624   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1625     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1626   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1627     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1628   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1629     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1630   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1631     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1632   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1633     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1634   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1635     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1636   else if (VecWidth == 128 && EltWidth == 16)
1637     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1638   else if (VecWidth == 256 && EltWidth == 16)
1639     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1640   else if (VecWidth == 512 && EltWidth == 16)
1641     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1642   else if (VecWidth == 128 && EltWidth == 8)
1643     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1644   else if (VecWidth == 256 && EltWidth == 8)
1645     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1646   else if (VecWidth == 512 && EltWidth == 8)
1647     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1648   else
1649     llvm_unreachable("Unexpected intrinsic");
1650 
1651   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1652                     CI.getArgOperand(2) };
1653 
1654   // If this isn't index form we need to swap operand 0 and 1.
1655   if (!IndexForm)
1656     std::swap(Args[0], Args[1]);
1657 
1658   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1659                                 Args);
1660   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1661                              : Builder.CreateBitCast(CI.getArgOperand(1),
1662                                                      Ty);
1663   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1664 }
1665 
1666 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1667                                          Intrinsic::ID IID) {
1668   Type *Ty = CI.getType();
1669   Value *Op0 = CI.getOperand(0);
1670   Value *Op1 = CI.getOperand(1);
1671   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1672   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1673 
1674   if (CI.arg_size() == 4) { // For masked intrinsics.
1675     Value *VecSrc = CI.getOperand(2);
1676     Value *Mask = CI.getOperand(3);
1677     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1678   }
1679   return Res;
1680 }
1681 
1682 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1683                                bool IsRotateRight) {
1684   Type *Ty = CI.getType();
1685   Value *Src = CI.getArgOperand(0);
1686   Value *Amt = CI.getArgOperand(1);
1687 
1688   // Amount may be scalar immediate, in which case create a splat vector.
1689   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1690   // we only care about the lowest log2 bits anyway.
1691   if (Amt->getType() != Ty) {
1692     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1693     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1694     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1695   }
1696 
1697   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1698   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1699   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1700 
1701   if (CI.arg_size() == 4) { // For masked intrinsics.
1702     Value *VecSrc = CI.getOperand(2);
1703     Value *Mask = CI.getOperand(3);
1704     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1705   }
1706   return Res;
1707 }
1708 
1709 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1710                               bool IsSigned) {
1711   Type *Ty = CI.getType();
1712   Value *LHS = CI.getArgOperand(0);
1713   Value *RHS = CI.getArgOperand(1);
1714 
1715   CmpInst::Predicate Pred;
1716   switch (Imm) {
1717   case 0x0:
1718     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1719     break;
1720   case 0x1:
1721     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1722     break;
1723   case 0x2:
1724     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1725     break;
1726   case 0x3:
1727     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1728     break;
1729   case 0x4:
1730     Pred = ICmpInst::ICMP_EQ;
1731     break;
1732   case 0x5:
1733     Pred = ICmpInst::ICMP_NE;
1734     break;
1735   case 0x6:
1736     return Constant::getNullValue(Ty); // FALSE
1737   case 0x7:
1738     return Constant::getAllOnesValue(Ty); // TRUE
1739   default:
1740     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1741   }
1742 
1743   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1744   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1745   return Ext;
1746 }
1747 
1748 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1749                                     bool IsShiftRight, bool ZeroMask) {
1750   Type *Ty = CI.getType();
1751   Value *Op0 = CI.getArgOperand(0);
1752   Value *Op1 = CI.getArgOperand(1);
1753   Value *Amt = CI.getArgOperand(2);
1754 
1755   if (IsShiftRight)
1756     std::swap(Op0, Op1);
1757 
1758   // Amount may be scalar immediate, in which case create a splat vector.
1759   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1760   // we only care about the lowest log2 bits anyway.
1761   if (Amt->getType() != Ty) {
1762     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1763     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1764     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1765   }
1766 
1767   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1768   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1769   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1770 
1771   unsigned NumArgs = CI.arg_size();
1772   if (NumArgs >= 4) { // For masked intrinsics.
1773     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1774                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1775                                    CI.getArgOperand(0);
1776     Value *Mask = CI.getOperand(NumArgs - 1);
1777     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1778   }
1779   return Res;
1780 }
1781 
1782 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1783                                  Value *Ptr, Value *Data, Value *Mask,
1784                                  bool Aligned) {
1785   // Cast the pointer to the right type.
1786   Ptr = Builder.CreateBitCast(Ptr,
1787                               llvm::PointerType::getUnqual(Data->getType()));
1788   const Align Alignment =
1789       Aligned
1790           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1791           : Align(1);
1792 
1793   // If the mask is all ones just emit a regular store.
1794   if (const auto *C = dyn_cast<Constant>(Mask))
1795     if (C->isAllOnesValue())
1796       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1797 
1798   // Convert the mask from an integer type to a vector of i1.
1799   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1800   Mask = getX86MaskVec(Builder, Mask, NumElts);
1801   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1802 }
1803 
1804 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1805                                 Value *Ptr, Value *Passthru, Value *Mask,
1806                                 bool Aligned) {
1807   Type *ValTy = Passthru->getType();
1808   // Cast the pointer to the right type.
1809   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1810   const Align Alignment =
1811       Aligned
1812           ? Align(
1813                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1814                 8)
1815           : Align(1);
1816 
1817   // If the mask is all ones just emit a regular store.
1818   if (const auto *C = dyn_cast<Constant>(Mask))
1819     if (C->isAllOnesValue())
1820       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1821 
1822   // Convert the mask from an integer type to a vector of i1.
1823   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1824   Mask = getX86MaskVec(Builder, Mask, NumElts);
1825   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1826 }
1827 
1828 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1829   Type *Ty = CI.getType();
1830   Value *Op0 = CI.getArgOperand(0);
1831   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1832   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1833   if (CI.arg_size() == 3)
1834     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1835   return Res;
1836 }
1837 
1838 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1839   Type *Ty = CI.getType();
1840 
1841   // Arguments have a vXi32 type so cast to vXi64.
1842   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1843   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1844 
1845   if (IsSigned) {
1846     // Shift left then arithmetic shift right.
1847     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1848     LHS = Builder.CreateShl(LHS, ShiftAmt);
1849     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1850     RHS = Builder.CreateShl(RHS, ShiftAmt);
1851     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1852   } else {
1853     // Clear the upper bits.
1854     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1855     LHS = Builder.CreateAnd(LHS, Mask);
1856     RHS = Builder.CreateAnd(RHS, Mask);
1857   }
1858 
1859   Value *Res = Builder.CreateMul(LHS, RHS);
1860 
1861   if (CI.arg_size() == 4)
1862     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1863 
1864   return Res;
1865 }
1866 
1867 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1868 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1869                                      Value *Mask) {
1870   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1871   if (Mask) {
1872     const auto *C = dyn_cast<Constant>(Mask);
1873     if (!C || !C->isAllOnesValue())
1874       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1875   }
1876 
1877   if (NumElts < 8) {
1878     int Indices[8];
1879     for (unsigned i = 0; i != NumElts; ++i)
1880       Indices[i] = i;
1881     for (unsigned i = NumElts; i != 8; ++i)
1882       Indices[i] = NumElts + i % NumElts;
1883     Vec = Builder.CreateShuffleVector(Vec,
1884                                       Constant::getNullValue(Vec->getType()),
1885                                       Indices);
1886   }
1887   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1888 }
1889 
1890 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1891                                    unsigned CC, bool Signed) {
1892   Value *Op0 = CI.getArgOperand(0);
1893   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1894 
1895   Value *Cmp;
1896   if (CC == 3) {
1897     Cmp = Constant::getNullValue(
1898         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1899   } else if (CC == 7) {
1900     Cmp = Constant::getAllOnesValue(
1901         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1902   } else {
1903     ICmpInst::Predicate Pred;
1904     switch (CC) {
1905     default: llvm_unreachable("Unknown condition code");
1906     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1907     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1908     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1909     case 4: Pred = ICmpInst::ICMP_NE;  break;
1910     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1911     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1912     }
1913     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1914   }
1915 
1916   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1917 
1918   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1919 }
1920 
1921 // Replace a masked intrinsic with an older unmasked intrinsic.
1922 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1923                                     Intrinsic::ID IID) {
1924   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1925   Value *Rep = Builder.CreateCall(Intrin,
1926                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1927   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1928 }
1929 
1930 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1931   Value* A = CI.getArgOperand(0);
1932   Value* B = CI.getArgOperand(1);
1933   Value* Src = CI.getArgOperand(2);
1934   Value* Mask = CI.getArgOperand(3);
1935 
1936   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1937   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1938   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1939   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1940   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1941   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1942 }
1943 
1944 
1945 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1946   Value* Op = CI.getArgOperand(0);
1947   Type* ReturnOp = CI.getType();
1948   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1949   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1950   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1951 }
1952 
1953 // Replace intrinsic with unmasked version and a select.
1954 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1955                                       CallBase &CI, Value *&Rep) {
1956   Name = Name.substr(12); // Remove avx512.mask.
1957 
1958   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1959   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1960   Intrinsic::ID IID;
1961   if (Name.starts_with("max.p")) {
1962     if (VecWidth == 128 && EltWidth == 32)
1963       IID = Intrinsic::x86_sse_max_ps;
1964     else if (VecWidth == 128 && EltWidth == 64)
1965       IID = Intrinsic::x86_sse2_max_pd;
1966     else if (VecWidth == 256 && EltWidth == 32)
1967       IID = Intrinsic::x86_avx_max_ps_256;
1968     else if (VecWidth == 256 && EltWidth == 64)
1969       IID = Intrinsic::x86_avx_max_pd_256;
1970     else
1971       llvm_unreachable("Unexpected intrinsic");
1972   } else if (Name.starts_with("min.p")) {
1973     if (VecWidth == 128 && EltWidth == 32)
1974       IID = Intrinsic::x86_sse_min_ps;
1975     else if (VecWidth == 128 && EltWidth == 64)
1976       IID = Intrinsic::x86_sse2_min_pd;
1977     else if (VecWidth == 256 && EltWidth == 32)
1978       IID = Intrinsic::x86_avx_min_ps_256;
1979     else if (VecWidth == 256 && EltWidth == 64)
1980       IID = Intrinsic::x86_avx_min_pd_256;
1981     else
1982       llvm_unreachable("Unexpected intrinsic");
1983   } else if (Name.starts_with("pshuf.b.")) {
1984     if (VecWidth == 128)
1985       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1986     else if (VecWidth == 256)
1987       IID = Intrinsic::x86_avx2_pshuf_b;
1988     else if (VecWidth == 512)
1989       IID = Intrinsic::x86_avx512_pshuf_b_512;
1990     else
1991       llvm_unreachable("Unexpected intrinsic");
1992   } else if (Name.starts_with("pmul.hr.sw.")) {
1993     if (VecWidth == 128)
1994       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1995     else if (VecWidth == 256)
1996       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1997     else if (VecWidth == 512)
1998       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1999     else
2000       llvm_unreachable("Unexpected intrinsic");
2001   } else if (Name.starts_with("pmulh.w.")) {
2002     if (VecWidth == 128)
2003       IID = Intrinsic::x86_sse2_pmulh_w;
2004     else if (VecWidth == 256)
2005       IID = Intrinsic::x86_avx2_pmulh_w;
2006     else if (VecWidth == 512)
2007       IID = Intrinsic::x86_avx512_pmulh_w_512;
2008     else
2009       llvm_unreachable("Unexpected intrinsic");
2010   } else if (Name.starts_with("pmulhu.w.")) {
2011     if (VecWidth == 128)
2012       IID = Intrinsic::x86_sse2_pmulhu_w;
2013     else if (VecWidth == 256)
2014       IID = Intrinsic::x86_avx2_pmulhu_w;
2015     else if (VecWidth == 512)
2016       IID = Intrinsic::x86_avx512_pmulhu_w_512;
2017     else
2018       llvm_unreachable("Unexpected intrinsic");
2019   } else if (Name.starts_with("pmaddw.d.")) {
2020     if (VecWidth == 128)
2021       IID = Intrinsic::x86_sse2_pmadd_wd;
2022     else if (VecWidth == 256)
2023       IID = Intrinsic::x86_avx2_pmadd_wd;
2024     else if (VecWidth == 512)
2025       IID = Intrinsic::x86_avx512_pmaddw_d_512;
2026     else
2027       llvm_unreachable("Unexpected intrinsic");
2028   } else if (Name.starts_with("pmaddubs.w.")) {
2029     if (VecWidth == 128)
2030       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2031     else if (VecWidth == 256)
2032       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2033     else if (VecWidth == 512)
2034       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2035     else
2036       llvm_unreachable("Unexpected intrinsic");
2037   } else if (Name.starts_with("packsswb.")) {
2038     if (VecWidth == 128)
2039       IID = Intrinsic::x86_sse2_packsswb_128;
2040     else if (VecWidth == 256)
2041       IID = Intrinsic::x86_avx2_packsswb;
2042     else if (VecWidth == 512)
2043       IID = Intrinsic::x86_avx512_packsswb_512;
2044     else
2045       llvm_unreachable("Unexpected intrinsic");
2046   } else if (Name.starts_with("packssdw.")) {
2047     if (VecWidth == 128)
2048       IID = Intrinsic::x86_sse2_packssdw_128;
2049     else if (VecWidth == 256)
2050       IID = Intrinsic::x86_avx2_packssdw;
2051     else if (VecWidth == 512)
2052       IID = Intrinsic::x86_avx512_packssdw_512;
2053     else
2054       llvm_unreachable("Unexpected intrinsic");
2055   } else if (Name.starts_with("packuswb.")) {
2056     if (VecWidth == 128)
2057       IID = Intrinsic::x86_sse2_packuswb_128;
2058     else if (VecWidth == 256)
2059       IID = Intrinsic::x86_avx2_packuswb;
2060     else if (VecWidth == 512)
2061       IID = Intrinsic::x86_avx512_packuswb_512;
2062     else
2063       llvm_unreachable("Unexpected intrinsic");
2064   } else if (Name.starts_with("packusdw.")) {
2065     if (VecWidth == 128)
2066       IID = Intrinsic::x86_sse41_packusdw;
2067     else if (VecWidth == 256)
2068       IID = Intrinsic::x86_avx2_packusdw;
2069     else if (VecWidth == 512)
2070       IID = Intrinsic::x86_avx512_packusdw_512;
2071     else
2072       llvm_unreachable("Unexpected intrinsic");
2073   } else if (Name.starts_with("vpermilvar.")) {
2074     if (VecWidth == 128 && EltWidth == 32)
2075       IID = Intrinsic::x86_avx_vpermilvar_ps;
2076     else if (VecWidth == 128 && EltWidth == 64)
2077       IID = Intrinsic::x86_avx_vpermilvar_pd;
2078     else if (VecWidth == 256 && EltWidth == 32)
2079       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2080     else if (VecWidth == 256 && EltWidth == 64)
2081       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2082     else if (VecWidth == 512 && EltWidth == 32)
2083       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2084     else if (VecWidth == 512 && EltWidth == 64)
2085       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2086     else
2087       llvm_unreachable("Unexpected intrinsic");
2088   } else if (Name == "cvtpd2dq.256") {
2089     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2090   } else if (Name == "cvtpd2ps.256") {
2091     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2092   } else if (Name == "cvttpd2dq.256") {
2093     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2094   } else if (Name == "cvttps2dq.128") {
2095     IID = Intrinsic::x86_sse2_cvttps2dq;
2096   } else if (Name == "cvttps2dq.256") {
2097     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2098   } else if (Name.starts_with("permvar.")) {
2099     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2100     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2101       IID = Intrinsic::x86_avx2_permps;
2102     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2103       IID = Intrinsic::x86_avx2_permd;
2104     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2105       IID = Intrinsic::x86_avx512_permvar_df_256;
2106     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2107       IID = Intrinsic::x86_avx512_permvar_di_256;
2108     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2109       IID = Intrinsic::x86_avx512_permvar_sf_512;
2110     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2111       IID = Intrinsic::x86_avx512_permvar_si_512;
2112     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2113       IID = Intrinsic::x86_avx512_permvar_df_512;
2114     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2115       IID = Intrinsic::x86_avx512_permvar_di_512;
2116     else if (VecWidth == 128 && EltWidth == 16)
2117       IID = Intrinsic::x86_avx512_permvar_hi_128;
2118     else if (VecWidth == 256 && EltWidth == 16)
2119       IID = Intrinsic::x86_avx512_permvar_hi_256;
2120     else if (VecWidth == 512 && EltWidth == 16)
2121       IID = Intrinsic::x86_avx512_permvar_hi_512;
2122     else if (VecWidth == 128 && EltWidth == 8)
2123       IID = Intrinsic::x86_avx512_permvar_qi_128;
2124     else if (VecWidth == 256 && EltWidth == 8)
2125       IID = Intrinsic::x86_avx512_permvar_qi_256;
2126     else if (VecWidth == 512 && EltWidth == 8)
2127       IID = Intrinsic::x86_avx512_permvar_qi_512;
2128     else
2129       llvm_unreachable("Unexpected intrinsic");
2130   } else if (Name.starts_with("dbpsadbw.")) {
2131     if (VecWidth == 128)
2132       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2133     else if (VecWidth == 256)
2134       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2135     else if (VecWidth == 512)
2136       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2137     else
2138       llvm_unreachable("Unexpected intrinsic");
2139   } else if (Name.starts_with("pmultishift.qb.")) {
2140     if (VecWidth == 128)
2141       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2142     else if (VecWidth == 256)
2143       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2144     else if (VecWidth == 512)
2145       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2146     else
2147       llvm_unreachable("Unexpected intrinsic");
2148   } else if (Name.starts_with("conflict.")) {
2149     if (Name[9] == 'd' && VecWidth == 128)
2150       IID = Intrinsic::x86_avx512_conflict_d_128;
2151     else if (Name[9] == 'd' && VecWidth == 256)
2152       IID = Intrinsic::x86_avx512_conflict_d_256;
2153     else if (Name[9] == 'd' && VecWidth == 512)
2154       IID = Intrinsic::x86_avx512_conflict_d_512;
2155     else if (Name[9] == 'q' && VecWidth == 128)
2156       IID = Intrinsic::x86_avx512_conflict_q_128;
2157     else if (Name[9] == 'q' && VecWidth == 256)
2158       IID = Intrinsic::x86_avx512_conflict_q_256;
2159     else if (Name[9] == 'q' && VecWidth == 512)
2160       IID = Intrinsic::x86_avx512_conflict_q_512;
2161     else
2162       llvm_unreachable("Unexpected intrinsic");
2163   } else if (Name.starts_with("pavg.")) {
2164     if (Name[5] == 'b' && VecWidth == 128)
2165       IID = Intrinsic::x86_sse2_pavg_b;
2166     else if (Name[5] == 'b' && VecWidth == 256)
2167       IID = Intrinsic::x86_avx2_pavg_b;
2168     else if (Name[5] == 'b' && VecWidth == 512)
2169       IID = Intrinsic::x86_avx512_pavg_b_512;
2170     else if (Name[5] == 'w' && VecWidth == 128)
2171       IID = Intrinsic::x86_sse2_pavg_w;
2172     else if (Name[5] == 'w' && VecWidth == 256)
2173       IID = Intrinsic::x86_avx2_pavg_w;
2174     else if (Name[5] == 'w' && VecWidth == 512)
2175       IID = Intrinsic::x86_avx512_pavg_w_512;
2176     else
2177       llvm_unreachable("Unexpected intrinsic");
2178   } else
2179     return false;
2180 
2181   SmallVector<Value *, 4> Args(CI.args());
2182   Args.pop_back();
2183   Args.pop_back();
2184   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2185                            Args);
2186   unsigned NumArgs = CI.arg_size();
2187   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2188                       CI.getArgOperand(NumArgs - 2));
2189   return true;
2190 }
2191 
2192 /// Upgrade comment in call to inline asm that represents an objc retain release
2193 /// marker.
2194 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2195   size_t Pos;
2196   if (AsmStr->find("mov\tfp") == 0 &&
2197       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2198       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2199     AsmStr->replace(Pos, 1, ";");
2200   }
2201 }
2202 
2203 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2204                                       IRBuilder<> &Builder) {
2205   if (Name == "mve.vctp64.old") {
2206     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2207     // correct type.
2208     Value *VCTP = Builder.CreateCall(
2209         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2210         CI->getArgOperand(0), CI->getName());
2211     Value *C1 = Builder.CreateCall(
2212         Intrinsic::getDeclaration(
2213             F->getParent(), Intrinsic::arm_mve_pred_v2i,
2214             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2215         VCTP);
2216     return Builder.CreateCall(
2217         Intrinsic::getDeclaration(
2218             F->getParent(), Intrinsic::arm_mve_pred_i2v,
2219             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2220         C1);
2221   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2222              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2223              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2224              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2225              Name ==
2226                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2227              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2228              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2229              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2230              Name ==
2231                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2232              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2233              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2234              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2235              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2236              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2237              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2238              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2239     std::vector<Type *> Tys;
2240     unsigned ID = CI->getIntrinsicID();
2241     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2242     switch (ID) {
2243     case Intrinsic::arm_mve_mull_int_predicated:
2244     case Intrinsic::arm_mve_vqdmull_predicated:
2245     case Intrinsic::arm_mve_vldr_gather_base_predicated:
2246       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2247       break;
2248     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2249     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2250     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2251       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2252              V2I1Ty};
2253       break;
2254     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2255       Tys = {CI->getType(), CI->getOperand(0)->getType(),
2256              CI->getOperand(1)->getType(), V2I1Ty};
2257       break;
2258     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2259       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2260              CI->getOperand(2)->getType(), V2I1Ty};
2261       break;
2262     case Intrinsic::arm_cde_vcx1q_predicated:
2263     case Intrinsic::arm_cde_vcx1qa_predicated:
2264     case Intrinsic::arm_cde_vcx2q_predicated:
2265     case Intrinsic::arm_cde_vcx2qa_predicated:
2266     case Intrinsic::arm_cde_vcx3q_predicated:
2267     case Intrinsic::arm_cde_vcx3qa_predicated:
2268       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2269       break;
2270     default:
2271       llvm_unreachable("Unhandled Intrinsic!");
2272     }
2273 
2274     std::vector<Value *> Ops;
2275     for (Value *Op : CI->args()) {
2276       Type *Ty = Op->getType();
2277       if (Ty->getScalarSizeInBits() == 1) {
2278         Value *C1 = Builder.CreateCall(
2279             Intrinsic::getDeclaration(
2280                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2281                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2282             Op);
2283         Op = Builder.CreateCall(
2284             Intrinsic::getDeclaration(F->getParent(),
2285                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2286             C1);
2287       }
2288       Ops.push_back(Op);
2289     }
2290 
2291     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2292     return Builder.CreateCall(Fn, Ops, CI->getName());
2293   }
2294   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2295 }
2296 
2297 static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
2298                                          Function *F, IRBuilder<> &Builder) {
2299   const bool IsInc = Name.starts_with("atomic.inc.");
2300   if (IsInc || Name.starts_with("atomic.dec.")) {
2301     if (CI->getNumOperands() != 6) // Malformed bitcode.
2302       return nullptr;
2303 
2304     AtomicRMWInst::BinOp RMWOp =
2305         IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2306 
2307     Value *Ptr = CI->getArgOperand(0);
2308     Value *Val = CI->getArgOperand(1);
2309     ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2310     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2311 
2312     AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2313     if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2314       Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2315     if (Order == AtomicOrdering::NotAtomic ||
2316         Order == AtomicOrdering::Unordered)
2317       Order = AtomicOrdering::SequentiallyConsistent;
2318 
2319     // The scope argument never really worked correctly. Use agent as the most
2320     // conservative option which should still always produce the instruction.
2321     SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2322     AtomicRMWInst *RMW =
2323         Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2324 
2325     if (!VolatileArg || !VolatileArg->isZero())
2326       RMW->setVolatile(true);
2327     return RMW;
2328   }
2329 
2330   llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2331 }
2332 
2333 /// Upgrade a call to an old intrinsic. All argument and return casting must be
2334 /// provided to seamlessly integrate with existing context.
2335 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
2336   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2337   // checks the callee's function type matches. It's likely we need to handle
2338   // type changes here.
2339   Function *F = dyn_cast<Function>(CI->getCalledOperand());
2340   if (!F)
2341     return;
2342 
2343   LLVMContext &C = CI->getContext();
2344   IRBuilder<> Builder(C);
2345   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2346 
2347   if (!NewFn) {
2348     // Get the Function's name.
2349     StringRef Name = F->getName();
2350 
2351     assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2352     Name = Name.substr(5);
2353 
2354     bool IsX86 = Name.starts_with("x86.");
2355     if (IsX86)
2356       Name = Name.substr(4);
2357     bool IsNVVM = Name.starts_with("nvvm.");
2358     if (IsNVVM)
2359       Name = Name.substr(5);
2360     bool IsARM = Name.starts_with("arm.");
2361     if (IsARM)
2362       Name = Name.substr(4);
2363     bool IsAMDGCN = Name.starts_with("amdgcn.");
2364     if (IsAMDGCN)
2365       Name = Name.substr(7);
2366 
2367     if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2368       SmallVector<Metadata *, 1> Elts;
2369       Elts.push_back(
2370           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2371       MDNode *Node = MDNode::get(C, Elts);
2372 
2373       Value *Arg0 = CI->getArgOperand(0);
2374       Value *Arg1 = CI->getArgOperand(1);
2375 
2376       // Nontemporal (unaligned) store of the 0'th element of the float/double
2377       // vector.
2378       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2379       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2380       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2381       Value *Extract =
2382           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2383 
2384       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2385       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2386 
2387       // Remove intrinsic.
2388       CI->eraseFromParent();
2389       return;
2390     }
2391 
2392     if (IsX86 && (Name.starts_with("avx.movnt.") ||
2393                   Name.starts_with("avx512.storent."))) {
2394       SmallVector<Metadata *, 1> Elts;
2395       Elts.push_back(
2396           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2397       MDNode *Node = MDNode::get(C, Elts);
2398 
2399       Value *Arg0 = CI->getArgOperand(0);
2400       Value *Arg1 = CI->getArgOperand(1);
2401 
2402       // Convert the type of the pointer to a pointer to the stored type.
2403       Value *BC = Builder.CreateBitCast(Arg0,
2404                                         PointerType::getUnqual(Arg1->getType()),
2405                                         "cast");
2406       StoreInst *SI = Builder.CreateAlignedStore(
2407           Arg1, BC,
2408           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2409       SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2410 
2411       // Remove intrinsic.
2412       CI->eraseFromParent();
2413       return;
2414     }
2415 
2416     if (IsX86 && Name == "sse2.storel.dq") {
2417       Value *Arg0 = CI->getArgOperand(0);
2418       Value *Arg1 = CI->getArgOperand(1);
2419 
2420       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2421       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2422       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2423       Value *BC = Builder.CreateBitCast(Arg0,
2424                                         PointerType::getUnqual(Elt->getType()),
2425                                         "cast");
2426       Builder.CreateAlignedStore(Elt, BC, Align(1));
2427 
2428       // Remove intrinsic.
2429       CI->eraseFromParent();
2430       return;
2431     }
2432 
2433     if (IsX86 && (Name.starts_with("sse.storeu.") ||
2434                   Name.starts_with("sse2.storeu.") ||
2435                   Name.starts_with("avx.storeu."))) {
2436       Value *Arg0 = CI->getArgOperand(0);
2437       Value *Arg1 = CI->getArgOperand(1);
2438 
2439       Arg0 = Builder.CreateBitCast(Arg0,
2440                                    PointerType::getUnqual(Arg1->getType()),
2441                                    "cast");
2442       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2443 
2444       // Remove intrinsic.
2445       CI->eraseFromParent();
2446       return;
2447     }
2448 
2449     if (IsX86 && Name == "avx512.mask.store.ss") {
2450       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2451       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2452                          Mask, false);
2453 
2454       // Remove intrinsic.
2455       CI->eraseFromParent();
2456       return;
2457     }
2458 
2459     if (IsX86 && (Name.starts_with("avx512.mask.store"))) {
2460       // "avx512.mask.storeu." or "avx512.mask.store."
2461       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2462       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2463                          CI->getArgOperand(2), Aligned);
2464 
2465       // Remove intrinsic.
2466       CI->eraseFromParent();
2467       return;
2468     }
2469 
2470     Value *Rep;
2471     // Upgrade packed integer vector compare intrinsics to compare instructions.
2472     if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2473                   Name.starts_with("avx2.pcmp"))) {
2474       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2475       bool CmpEq = Name[9] == 'e';
2476       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2477                                CI->getArgOperand(0), CI->getArgOperand(1));
2478       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2479     } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2480       Type *ExtTy = Type::getInt32Ty(C);
2481       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2482         ExtTy = Type::getInt64Ty(C);
2483       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2484                          ExtTy->getPrimitiveSizeInBits();
2485       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2486       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2487     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2488                          Name == "sse2.sqrt.sd")) {
2489       Value *Vec = CI->getArgOperand(0);
2490       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2491       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2492                                                  Intrinsic::sqrt, Elt0->getType());
2493       Elt0 = Builder.CreateCall(Intr, Elt0);
2494       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2495     } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2496                          Name.starts_with("sse2.sqrt.p") ||
2497                          Name.starts_with("sse.sqrt.p"))) {
2498       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2499                                                          Intrinsic::sqrt,
2500                                                          CI->getType()),
2501                                {CI->getArgOperand(0)});
2502     } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2503       if (CI->arg_size() == 4 &&
2504           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2505            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2506         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2507                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2508 
2509         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2510         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2511                                                            IID), Args);
2512       } else {
2513         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2514                                                            Intrinsic::sqrt,
2515                                                            CI->getType()),
2516                                  {CI->getArgOperand(0)});
2517       }
2518       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2519                           CI->getArgOperand(1));
2520     } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2521                          Name.starts_with("avx512.ptestnm"))) {
2522       Value *Op0 = CI->getArgOperand(0);
2523       Value *Op1 = CI->getArgOperand(1);
2524       Value *Mask = CI->getArgOperand(2);
2525       Rep = Builder.CreateAnd(Op0, Op1);
2526       llvm::Type *Ty = Op0->getType();
2527       Value *Zero = llvm::Constant::getNullValue(Ty);
2528       ICmpInst::Predicate Pred =
2529         Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2530       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2531       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2532     } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2533       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2534                              ->getNumElements();
2535       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2536       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2537                           CI->getArgOperand(1));
2538     } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2539       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2540       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2541       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2542       int Indices[64];
2543       for (unsigned i = 0; i != NumElts; ++i)
2544         Indices[i] = i;
2545 
2546       // First extract half of each vector. This gives better codegen than
2547       // doing it in a single shuffle.
2548       LHS =
2549           Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2550       RHS =
2551           Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2552       // Concat the vectors.
2553       // NOTE: Operands have to be swapped to match intrinsic definition.
2554       Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2555       Rep = Builder.CreateBitCast(Rep, CI->getType());
2556     } else if (IsX86 && Name == "avx512.kand.w") {
2557       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2558       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2559       Rep = Builder.CreateAnd(LHS, RHS);
2560       Rep = Builder.CreateBitCast(Rep, CI->getType());
2561     } else if (IsX86 && Name == "avx512.kandn.w") {
2562       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2563       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2564       LHS = Builder.CreateNot(LHS);
2565       Rep = Builder.CreateAnd(LHS, RHS);
2566       Rep = Builder.CreateBitCast(Rep, CI->getType());
2567     } else if (IsX86 && Name == "avx512.kor.w") {
2568       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2569       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2570       Rep = Builder.CreateOr(LHS, RHS);
2571       Rep = Builder.CreateBitCast(Rep, CI->getType());
2572     } else if (IsX86 && Name == "avx512.kxor.w") {
2573       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2574       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2575       Rep = Builder.CreateXor(LHS, RHS);
2576       Rep = Builder.CreateBitCast(Rep, CI->getType());
2577     } else if (IsX86 && Name == "avx512.kxnor.w") {
2578       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2579       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2580       LHS = Builder.CreateNot(LHS);
2581       Rep = Builder.CreateXor(LHS, RHS);
2582       Rep = Builder.CreateBitCast(Rep, CI->getType());
2583     } else if (IsX86 && Name == "avx512.knot.w") {
2584       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2585       Rep = Builder.CreateNot(Rep);
2586       Rep = Builder.CreateBitCast(Rep, CI->getType());
2587     } else if (IsX86 &&
2588                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2589       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2590       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2591       Rep = Builder.CreateOr(LHS, RHS);
2592       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2593       Value *C;
2594       if (Name[14] == 'c')
2595         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2596       else
2597         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2598       Rep = Builder.CreateICmpEQ(Rep, C);
2599       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2600     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2601                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2602                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2603                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2604       Type *I32Ty = Type::getInt32Ty(C);
2605       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2606                                                  ConstantInt::get(I32Ty, 0));
2607       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2608                                                  ConstantInt::get(I32Ty, 0));
2609       Value *EltOp;
2610       if (Name.contains(".add."))
2611         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2612       else if (Name.contains(".sub."))
2613         EltOp = Builder.CreateFSub(Elt0, Elt1);
2614       else if (Name.contains(".mul."))
2615         EltOp = Builder.CreateFMul(Elt0, Elt1);
2616       else
2617         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2618       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2619                                         ConstantInt::get(I32Ty, 0));
2620     } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2621       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2622       bool CmpEq = Name[16] == 'e';
2623       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2624     } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2625       Type *OpTy = CI->getArgOperand(0)->getType();
2626       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2627       Intrinsic::ID IID;
2628       switch (VecWidth) {
2629       default: llvm_unreachable("Unexpected intrinsic");
2630       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2631       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2632       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2633       }
2634 
2635       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2636                                { CI->getOperand(0), CI->getArgOperand(1) });
2637       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2638     } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2639       Type *OpTy = CI->getArgOperand(0)->getType();
2640       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2641       unsigned EltWidth = OpTy->getScalarSizeInBits();
2642       Intrinsic::ID IID;
2643       if (VecWidth == 128 && EltWidth == 32)
2644         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2645       else if (VecWidth == 256 && EltWidth == 32)
2646         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2647       else if (VecWidth == 512 && EltWidth == 32)
2648         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2649       else if (VecWidth == 128 && EltWidth == 64)
2650         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2651       else if (VecWidth == 256 && EltWidth == 64)
2652         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2653       else if (VecWidth == 512 && EltWidth == 64)
2654         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2655       else
2656         llvm_unreachable("Unexpected intrinsic");
2657 
2658       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2659                                { CI->getOperand(0), CI->getArgOperand(1) });
2660       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2661     } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2662       SmallVector<Value *, 4> Args(CI->args());
2663       Type *OpTy = Args[0]->getType();
2664       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2665       unsigned EltWidth = OpTy->getScalarSizeInBits();
2666       Intrinsic::ID IID;
2667       if (VecWidth == 128 && EltWidth == 32)
2668         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2669       else if (VecWidth == 256 && EltWidth == 32)
2670         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2671       else if (VecWidth == 512 && EltWidth == 32)
2672         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2673       else if (VecWidth == 128 && EltWidth == 64)
2674         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2675       else if (VecWidth == 256 && EltWidth == 64)
2676         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2677       else if (VecWidth == 512 && EltWidth == 64)
2678         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2679       else
2680         llvm_unreachable("Unexpected intrinsic");
2681 
2682       Value *Mask = Constant::getAllOnesValue(CI->getType());
2683       if (VecWidth == 512)
2684         std::swap(Mask, Args.back());
2685       Args.push_back(Mask);
2686 
2687       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2688                                Args);
2689     } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2690       // Integer compare intrinsics.
2691       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2692       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2693     } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2694       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2695       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2696     } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2697                          Name.starts_with("avx512.cvtw2mask.") ||
2698                          Name.starts_with("avx512.cvtd2mask.") ||
2699                          Name.starts_with("avx512.cvtq2mask."))) {
2700       Value *Op = CI->getArgOperand(0);
2701       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2702       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2703       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2704     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2705                         Name == "ssse3.pabs.w.128" ||
2706                         Name == "ssse3.pabs.d.128" ||
2707                         Name.starts_with("avx2.pabs") ||
2708                         Name.starts_with("avx512.mask.pabs"))) {
2709       Rep = upgradeAbs(Builder, *CI);
2710     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2711                          Name == "sse2.pmaxs.w" ||
2712                          Name == "sse41.pmaxsd" ||
2713                          Name.starts_with("avx2.pmaxs") ||
2714                          Name.starts_with("avx512.mask.pmaxs"))) {
2715       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2716     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2717                          Name == "sse41.pmaxuw" ||
2718                          Name == "sse41.pmaxud" ||
2719                          Name.starts_with("avx2.pmaxu") ||
2720                          Name.starts_with("avx512.mask.pmaxu"))) {
2721       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2722     } else if (IsX86 && (Name == "sse41.pminsb" ||
2723                          Name == "sse2.pmins.w" ||
2724                          Name == "sse41.pminsd" ||
2725                          Name.starts_with("avx2.pmins") ||
2726                          Name.starts_with("avx512.mask.pmins"))) {
2727       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2728     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2729                          Name == "sse41.pminuw" ||
2730                          Name == "sse41.pminud" ||
2731                          Name.starts_with("avx2.pminu") ||
2732                          Name.starts_with("avx512.mask.pminu"))) {
2733       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2734     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2735                          Name == "avx2.pmulu.dq" ||
2736                          Name == "avx512.pmulu.dq.512" ||
2737                          Name.starts_with("avx512.mask.pmulu.dq."))) {
2738       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2739     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2740                          Name == "avx2.pmul.dq" ||
2741                          Name == "avx512.pmul.dq.512" ||
2742                          Name.starts_with("avx512.mask.pmul.dq."))) {
2743       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2744     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2745                          Name == "sse2.cvtsi2sd" ||
2746                          Name == "sse.cvtsi642ss" ||
2747                          Name == "sse2.cvtsi642sd")) {
2748       Rep = Builder.CreateSIToFP(
2749           CI->getArgOperand(1),
2750           cast<VectorType>(CI->getType())->getElementType());
2751       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2752     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2753       Rep = Builder.CreateUIToFP(
2754           CI->getArgOperand(1),
2755           cast<VectorType>(CI->getType())->getElementType());
2756       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2757     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2758       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2759       Rep = Builder.CreateFPExt(
2760           Rep, cast<VectorType>(CI->getType())->getElementType());
2761       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2762     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2763                          Name == "sse2.cvtdq2ps" ||
2764                          Name == "avx.cvtdq2.pd.256" ||
2765                          Name == "avx.cvtdq2.ps.256" ||
2766                          Name.starts_with("avx512.mask.cvtdq2pd.") ||
2767                          Name.starts_with("avx512.mask.cvtudq2pd.") ||
2768                          Name.starts_with("avx512.mask.cvtdq2ps.") ||
2769                          Name.starts_with("avx512.mask.cvtudq2ps.") ||
2770                          Name.starts_with("avx512.mask.cvtqq2pd.") ||
2771                          Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2772                          Name == "avx512.mask.cvtqq2ps.256" ||
2773                          Name == "avx512.mask.cvtqq2ps.512" ||
2774                          Name == "avx512.mask.cvtuqq2ps.256" ||
2775                          Name == "avx512.mask.cvtuqq2ps.512" ||
2776                          Name == "sse2.cvtps2pd" ||
2777                          Name == "avx.cvt.ps2.pd.256" ||
2778                          Name == "avx512.mask.cvtps2pd.128" ||
2779                          Name == "avx512.mask.cvtps2pd.256")) {
2780       auto *DstTy = cast<FixedVectorType>(CI->getType());
2781       Rep = CI->getArgOperand(0);
2782       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2783 
2784       unsigned NumDstElts = DstTy->getNumElements();
2785       if (NumDstElts < SrcTy->getNumElements()) {
2786         assert(NumDstElts == 2 && "Unexpected vector size");
2787         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2788       }
2789 
2790       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2791       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2792       if (IsPS2PD)
2793         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2794       else if (CI->arg_size() == 4 &&
2795                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2796                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2797         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2798                                        : Intrinsic::x86_avx512_sitofp_round;
2799         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2800                                                 { DstTy, SrcTy });
2801         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2802       } else {
2803         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2804                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2805       }
2806 
2807       if (CI->arg_size() >= 3)
2808         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2809                             CI->getArgOperand(1));
2810     } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2811                          Name.starts_with("vcvtph2ps."))) {
2812       auto *DstTy = cast<FixedVectorType>(CI->getType());
2813       Rep = CI->getArgOperand(0);
2814       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2815       unsigned NumDstElts = DstTy->getNumElements();
2816       if (NumDstElts != SrcTy->getNumElements()) {
2817         assert(NumDstElts == 4 && "Unexpected vector size");
2818         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2819       }
2820       Rep = Builder.CreateBitCast(
2821           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2822       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2823       if (CI->arg_size() >= 3)
2824         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2825                             CI->getArgOperand(1));
2826     } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2827       // "avx512.mask.loadu." or "avx512.mask.load."
2828       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2829       Rep =
2830           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2831                             CI->getArgOperand(2), Aligned);
2832     } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2833       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2834       Type *PtrTy = ResultTy->getElementType();
2835 
2836       // Cast the pointer to element type.
2837       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2838                                          llvm::PointerType::getUnqual(PtrTy));
2839 
2840       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2841                                      ResultTy->getNumElements());
2842 
2843       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2844                                                 Intrinsic::masked_expandload,
2845                                                 ResultTy);
2846       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2847     } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2848       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2849       Type *PtrTy = ResultTy->getElementType();
2850 
2851       // Cast the pointer to element type.
2852       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2853                                          llvm::PointerType::getUnqual(PtrTy));
2854 
2855       Value *MaskVec =
2856           getX86MaskVec(Builder, CI->getArgOperand(2),
2857                         cast<FixedVectorType>(ResultTy)->getNumElements());
2858 
2859       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2860                                                 Intrinsic::masked_compressstore,
2861                                                 ResultTy);
2862       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2863     } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2864                          Name.starts_with("avx512.mask.expand."))) {
2865       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2866 
2867       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2868                                      ResultTy->getNumElements());
2869 
2870       bool IsCompress = Name[12] == 'c';
2871       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2872                                      : Intrinsic::x86_avx512_mask_expand;
2873       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2874       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2875                                        MaskVec });
2876     } else if (IsX86 && Name.starts_with("xop.vpcom")) {
2877       bool IsSigned;
2878       if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2879           Name.ends_with("uq"))
2880         IsSigned = false;
2881       else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
2882                Name.ends_with("q"))
2883         IsSigned = true;
2884       else
2885         llvm_unreachable("Unknown suffix");
2886 
2887       unsigned Imm;
2888       if (CI->arg_size() == 3) {
2889         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2890       } else {
2891         Name = Name.substr(9); // strip off "xop.vpcom"
2892         if (Name.starts_with("lt"))
2893           Imm = 0;
2894         else if (Name.starts_with("le"))
2895           Imm = 1;
2896         else if (Name.starts_with("gt"))
2897           Imm = 2;
2898         else if (Name.starts_with("ge"))
2899           Imm = 3;
2900         else if (Name.starts_with("eq"))
2901           Imm = 4;
2902         else if (Name.starts_with("ne"))
2903           Imm = 5;
2904         else if (Name.starts_with("false"))
2905           Imm = 6;
2906         else if (Name.starts_with("true"))
2907           Imm = 7;
2908         else
2909           llvm_unreachable("Unknown condition");
2910       }
2911 
2912       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2913     } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
2914       Value *Sel = CI->getArgOperand(2);
2915       Value *NotSel = Builder.CreateNot(Sel);
2916       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2917       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2918       Rep = Builder.CreateOr(Sel0, Sel1);
2919     } else if (IsX86 && (Name.starts_with("xop.vprot") ||
2920                          Name.starts_with("avx512.prol") ||
2921                          Name.starts_with("avx512.mask.prol"))) {
2922       Rep = upgradeX86Rotate(Builder, *CI, false);
2923     } else if (IsX86 && (Name.starts_with("avx512.pror") ||
2924                          Name.starts_with("avx512.mask.pror"))) {
2925       Rep = upgradeX86Rotate(Builder, *CI, true);
2926     } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
2927                          Name.starts_with("avx512.mask.vpshld") ||
2928                          Name.starts_with("avx512.maskz.vpshld"))) {
2929       bool ZeroMask = Name[11] == 'z';
2930       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2931     } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
2932                          Name.starts_with("avx512.mask.vpshrd") ||
2933                          Name.starts_with("avx512.maskz.vpshrd"))) {
2934       bool ZeroMask = Name[11] == 'z';
2935       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2936     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2937       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2938                                                Intrinsic::x86_sse42_crc32_32_8);
2939       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2940       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2941       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2942     } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
2943                          Name.starts_with("avx512.vbroadcast.s"))) {
2944       // Replace broadcasts with a series of insertelements.
2945       auto *VecTy = cast<FixedVectorType>(CI->getType());
2946       Type *EltTy = VecTy->getElementType();
2947       unsigned EltNum = VecTy->getNumElements();
2948       Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2949       Type *I32Ty = Type::getInt32Ty(C);
2950       Rep = PoisonValue::get(VecTy);
2951       for (unsigned I = 0; I < EltNum; ++I)
2952         Rep = Builder.CreateInsertElement(Rep, Load,
2953                                           ConstantInt::get(I32Ty, I));
2954     } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
2955                          Name.starts_with("sse41.pmovzx") ||
2956                          Name.starts_with("avx2.pmovsx") ||
2957                          Name.starts_with("avx2.pmovzx") ||
2958                          Name.starts_with("avx512.mask.pmovsx") ||
2959                          Name.starts_with("avx512.mask.pmovzx"))) {
2960       auto *DstTy = cast<FixedVectorType>(CI->getType());
2961       unsigned NumDstElts = DstTy->getNumElements();
2962 
2963       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2964       SmallVector<int, 8> ShuffleMask(NumDstElts);
2965       for (unsigned i = 0; i != NumDstElts; ++i)
2966         ShuffleMask[i] = i;
2967 
2968       Value *SV =
2969           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2970 
2971       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2972       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2973                    : Builder.CreateZExt(SV, DstTy);
2974       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2975       if (CI->arg_size() == 3)
2976         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2977                             CI->getArgOperand(1));
2978     } else if (Name == "avx512.mask.pmov.qd.256" ||
2979                Name == "avx512.mask.pmov.qd.512" ||
2980                Name == "avx512.mask.pmov.wb.256" ||
2981                Name == "avx512.mask.pmov.wb.512") {
2982       Type *Ty = CI->getArgOperand(1)->getType();
2983       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2984       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2985                           CI->getArgOperand(1));
2986     } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
2987                          Name == "avx2.vbroadcasti128")) {
2988       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2989       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2990       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2991       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2992       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2993                                             PointerType::getUnqual(VT));
2994       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2995       if (NumSrcElts == 2)
2996         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2997       else
2998         Rep = Builder.CreateShuffleVector(
2999             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3000     } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
3001                          Name.starts_with("avx512.mask.shuf.f"))) {
3002       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3003       Type *VT = CI->getType();
3004       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3005       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3006       unsigned ControlBitsMask = NumLanes - 1;
3007       unsigned NumControlBits = NumLanes / 2;
3008       SmallVector<int, 8> ShuffleMask(0);
3009 
3010       for (unsigned l = 0; l != NumLanes; ++l) {
3011         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3012         // We actually need the other source.
3013         if (l >= NumLanes / 2)
3014           LaneMask += NumLanes;
3015         for (unsigned i = 0; i != NumElementsInLane; ++i)
3016           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3017       }
3018       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3019                                         CI->getArgOperand(1), ShuffleMask);
3020       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3021                           CI->getArgOperand(3));
3022     }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
3023                          Name.starts_with("avx512.mask.broadcasti"))) {
3024       unsigned NumSrcElts =
3025           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3026               ->getNumElements();
3027       unsigned NumDstElts =
3028           cast<FixedVectorType>(CI->getType())->getNumElements();
3029 
3030       SmallVector<int, 8> ShuffleMask(NumDstElts);
3031       for (unsigned i = 0; i != NumDstElts; ++i)
3032         ShuffleMask[i] = i % NumSrcElts;
3033 
3034       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3035                                         CI->getArgOperand(0),
3036                                         ShuffleMask);
3037       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3038                           CI->getArgOperand(1));
3039     } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
3040                          Name.starts_with("avx2.vbroadcast") ||
3041                          Name.starts_with("avx512.pbroadcast") ||
3042                          Name.starts_with("avx512.mask.broadcast.s"))) {
3043       // Replace vp?broadcasts with a vector shuffle.
3044       Value *Op = CI->getArgOperand(0);
3045       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3046       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3047       SmallVector<int, 8> M;
3048       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3049       Rep = Builder.CreateShuffleVector(Op, M);
3050 
3051       if (CI->arg_size() == 3)
3052         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3053                             CI->getArgOperand(1));
3054     } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
3055                          Name.starts_with("avx2.padds.") ||
3056                          Name.starts_with("avx512.padds.") ||
3057                          Name.starts_with("avx512.mask.padds."))) {
3058       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3059     } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
3060                          Name.starts_with("avx2.psubs.") ||
3061                          Name.starts_with("avx512.psubs.") ||
3062                          Name.starts_with("avx512.mask.psubs."))) {
3063       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3064     } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3065                          Name.starts_with("avx2.paddus.") ||
3066                          Name.starts_with("avx512.mask.paddus."))) {
3067       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3068     } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3069                          Name.starts_with("avx2.psubus.") ||
3070                          Name.starts_with("avx512.mask.psubus."))) {
3071       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3072     } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3073       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3074                                       CI->getArgOperand(1),
3075                                       CI->getArgOperand(2),
3076                                       CI->getArgOperand(3),
3077                                       CI->getArgOperand(4),
3078                                       false);
3079     } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3080       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3081                                       CI->getArgOperand(1),
3082                                       CI->getArgOperand(2),
3083                                       CI->getArgOperand(3),
3084                                       CI->getArgOperand(4),
3085                                       true);
3086     } else if (IsX86 && (Name == "sse2.psll.dq" ||
3087                          Name == "avx2.psll.dq")) {
3088       // 128/256-bit shift left specified in bits.
3089       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3090       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3091                                        Shift / 8); // Shift is in bits.
3092     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3093                          Name == "avx2.psrl.dq")) {
3094       // 128/256-bit shift right specified in bits.
3095       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3096       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3097                                        Shift / 8); // Shift is in bits.
3098     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3099                          Name == "avx2.psll.dq.bs" ||
3100                          Name == "avx512.psll.dq.512")) {
3101       // 128/256/512-bit shift left specified in bytes.
3102       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3103       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3104     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3105                          Name == "avx2.psrl.dq.bs" ||
3106                          Name == "avx512.psrl.dq.512")) {
3107       // 128/256/512-bit shift right specified in bytes.
3108       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3109       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3110     } else if (IsX86 && (Name == "sse41.pblendw" ||
3111                          Name.starts_with("sse41.blendp") ||
3112                          Name.starts_with("avx.blend.p") ||
3113                          Name == "avx2.pblendw" ||
3114                          Name.starts_with("avx2.pblendd."))) {
3115       Value *Op0 = CI->getArgOperand(0);
3116       Value *Op1 = CI->getArgOperand(1);
3117       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3118       auto *VecTy = cast<FixedVectorType>(CI->getType());
3119       unsigned NumElts = VecTy->getNumElements();
3120 
3121       SmallVector<int, 16> Idxs(NumElts);
3122       for (unsigned i = 0; i != NumElts; ++i)
3123         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3124 
3125       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3126     } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3127                          Name == "avx2.vinserti128" ||
3128                          Name.starts_with("avx512.mask.insert"))) {
3129       Value *Op0 = CI->getArgOperand(0);
3130       Value *Op1 = CI->getArgOperand(1);
3131       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3132       unsigned DstNumElts =
3133           cast<FixedVectorType>(CI->getType())->getNumElements();
3134       unsigned SrcNumElts =
3135           cast<FixedVectorType>(Op1->getType())->getNumElements();
3136       unsigned Scale = DstNumElts / SrcNumElts;
3137 
3138       // Mask off the high bits of the immediate value; hardware ignores those.
3139       Imm = Imm % Scale;
3140 
3141       // Extend the second operand into a vector the size of the destination.
3142       SmallVector<int, 8> Idxs(DstNumElts);
3143       for (unsigned i = 0; i != SrcNumElts; ++i)
3144         Idxs[i] = i;
3145       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3146         Idxs[i] = SrcNumElts;
3147       Rep = Builder.CreateShuffleVector(Op1, Idxs);
3148 
3149       // Insert the second operand into the first operand.
3150 
3151       // Note that there is no guarantee that instruction lowering will actually
3152       // produce a vinsertf128 instruction for the created shuffles. In
3153       // particular, the 0 immediate case involves no lane changes, so it can
3154       // be handled as a blend.
3155 
3156       // Example of shuffle mask for 32-bit elements:
3157       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3158       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3159 
3160       // First fill with identify mask.
3161       for (unsigned i = 0; i != DstNumElts; ++i)
3162         Idxs[i] = i;
3163       // Then replace the elements where we need to insert.
3164       for (unsigned i = 0; i != SrcNumElts; ++i)
3165         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3166       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3167 
3168       // If the intrinsic has a mask operand, handle that.
3169       if (CI->arg_size() == 5)
3170         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3171                             CI->getArgOperand(3));
3172     } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3173                          Name == "avx2.vextracti128" ||
3174                          Name.starts_with("avx512.mask.vextract"))) {
3175       Value *Op0 = CI->getArgOperand(0);
3176       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3177       unsigned DstNumElts =
3178           cast<FixedVectorType>(CI->getType())->getNumElements();
3179       unsigned SrcNumElts =
3180           cast<FixedVectorType>(Op0->getType())->getNumElements();
3181       unsigned Scale = SrcNumElts / DstNumElts;
3182 
3183       // Mask off the high bits of the immediate value; hardware ignores those.
3184       Imm = Imm % Scale;
3185 
3186       // Get indexes for the subvector of the input vector.
3187       SmallVector<int, 8> Idxs(DstNumElts);
3188       for (unsigned i = 0; i != DstNumElts; ++i) {
3189         Idxs[i] = i + (Imm * DstNumElts);
3190       }
3191       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3192 
3193       // If the intrinsic has a mask operand, handle that.
3194       if (CI->arg_size() == 4)
3195         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3196                             CI->getArgOperand(2));
3197     } else if (!IsX86 && Name == "stackprotectorcheck") {
3198       Rep = nullptr;
3199     } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3200                          Name.starts_with("avx512.mask.perm.di."))) {
3201       Value *Op0 = CI->getArgOperand(0);
3202       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3203       auto *VecTy = cast<FixedVectorType>(CI->getType());
3204       unsigned NumElts = VecTy->getNumElements();
3205 
3206       SmallVector<int, 8> Idxs(NumElts);
3207       for (unsigned i = 0; i != NumElts; ++i)
3208         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3209 
3210       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3211 
3212       if (CI->arg_size() == 4)
3213         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3214                             CI->getArgOperand(2));
3215     } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3216                          Name == "avx2.vperm2i128")) {
3217       // The immediate permute control byte looks like this:
3218       //    [1:0] - select 128 bits from sources for low half of destination
3219       //    [2]   - ignore
3220       //    [3]   - zero low half of destination
3221       //    [5:4] - select 128 bits from sources for high half of destination
3222       //    [6]   - ignore
3223       //    [7]   - zero high half of destination
3224 
3225       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3226 
3227       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3228       unsigned HalfSize = NumElts / 2;
3229       SmallVector<int, 8> ShuffleMask(NumElts);
3230 
3231       // Determine which operand(s) are actually in use for this instruction.
3232       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3233       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3234 
3235       // If needed, replace operands based on zero mask.
3236       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3237       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3238 
3239       // Permute low half of result.
3240       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3241       for (unsigned i = 0; i < HalfSize; ++i)
3242         ShuffleMask[i] = StartIndex + i;
3243 
3244       // Permute high half of result.
3245       StartIndex = (Imm & 0x10) ? HalfSize : 0;
3246       for (unsigned i = 0; i < HalfSize; ++i)
3247         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3248 
3249       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3250 
3251     } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3252                          Name == "sse2.pshuf.d" ||
3253                          Name.starts_with("avx512.mask.vpermil.p") ||
3254                          Name.starts_with("avx512.mask.pshuf.d."))) {
3255       Value *Op0 = CI->getArgOperand(0);
3256       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3257       auto *VecTy = cast<FixedVectorType>(CI->getType());
3258       unsigned NumElts = VecTy->getNumElements();
3259       // Calculate the size of each index in the immediate.
3260       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3261       unsigned IdxMask = ((1 << IdxSize) - 1);
3262 
3263       SmallVector<int, 8> Idxs(NumElts);
3264       // Lookup the bits for this element, wrapping around the immediate every
3265       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3266       // to offset by the first index of each group.
3267       for (unsigned i = 0; i != NumElts; ++i)
3268         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3269 
3270       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3271 
3272       if (CI->arg_size() == 4)
3273         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3274                             CI->getArgOperand(2));
3275     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3276                          Name.starts_with("avx512.mask.pshufl.w."))) {
3277       Value *Op0 = CI->getArgOperand(0);
3278       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3279       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3280 
3281       SmallVector<int, 16> Idxs(NumElts);
3282       for (unsigned l = 0; l != NumElts; l += 8) {
3283         for (unsigned i = 0; i != 4; ++i)
3284           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3285         for (unsigned i = 4; i != 8; ++i)
3286           Idxs[i + l] = i + l;
3287       }
3288 
3289       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3290 
3291       if (CI->arg_size() == 4)
3292         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3293                             CI->getArgOperand(2));
3294     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3295                          Name.starts_with("avx512.mask.pshufh.w."))) {
3296       Value *Op0 = CI->getArgOperand(0);
3297       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3298       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3299 
3300       SmallVector<int, 16> Idxs(NumElts);
3301       for (unsigned l = 0; l != NumElts; l += 8) {
3302         for (unsigned i = 0; i != 4; ++i)
3303           Idxs[i + l] = i + l;
3304         for (unsigned i = 0; i != 4; ++i)
3305           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3306       }
3307 
3308       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3309 
3310       if (CI->arg_size() == 4)
3311         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3312                             CI->getArgOperand(2));
3313     } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3314       Value *Op0 = CI->getArgOperand(0);
3315       Value *Op1 = CI->getArgOperand(1);
3316       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3317       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3318 
3319       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3320       unsigned HalfLaneElts = NumLaneElts / 2;
3321 
3322       SmallVector<int, 16> Idxs(NumElts);
3323       for (unsigned i = 0; i != NumElts; ++i) {
3324         // Base index is the starting element of the lane.
3325         Idxs[i] = i - (i % NumLaneElts);
3326         // If we are half way through the lane switch to the other source.
3327         if ((i % NumLaneElts) >= HalfLaneElts)
3328           Idxs[i] += NumElts;
3329         // Now select the specific element. By adding HalfLaneElts bits from
3330         // the immediate. Wrapping around the immediate every 8-bits.
3331         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3332       }
3333 
3334       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3335 
3336       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
3337                           CI->getArgOperand(3));
3338     } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3339                          Name.starts_with("avx512.mask.movshdup") ||
3340                          Name.starts_with("avx512.mask.movsldup"))) {
3341       Value *Op0 = CI->getArgOperand(0);
3342       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3343       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3344 
3345       unsigned Offset = 0;
3346       if (Name.starts_with("avx512.mask.movshdup."))
3347         Offset = 1;
3348 
3349       SmallVector<int, 16> Idxs(NumElts);
3350       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3351         for (unsigned i = 0; i != NumLaneElts; i += 2) {
3352           Idxs[i + l + 0] = i + l + Offset;
3353           Idxs[i + l + 1] = i + l + Offset;
3354         }
3355 
3356       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3357 
3358       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3359                           CI->getArgOperand(1));
3360     } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3361                          Name.starts_with("avx512.mask.unpckl."))) {
3362       Value *Op0 = CI->getArgOperand(0);
3363       Value *Op1 = CI->getArgOperand(1);
3364       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3365       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3366 
3367       SmallVector<int, 64> Idxs(NumElts);
3368       for (int l = 0; l != NumElts; l += NumLaneElts)
3369         for (int i = 0; i != NumLaneElts; ++i)
3370           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3371 
3372       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3373 
3374       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3375                           CI->getArgOperand(2));
3376     } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3377                          Name.starts_with("avx512.mask.unpckh."))) {
3378       Value *Op0 = CI->getArgOperand(0);
3379       Value *Op1 = CI->getArgOperand(1);
3380       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3381       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3382 
3383       SmallVector<int, 64> Idxs(NumElts);
3384       for (int l = 0; l != NumElts; l += NumLaneElts)
3385         for (int i = 0; i != NumLaneElts; ++i)
3386           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3387 
3388       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3389 
3390       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3391                           CI->getArgOperand(2));
3392     } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3393                          Name.starts_with("avx512.mask.pand."))) {
3394       VectorType *FTy = cast<VectorType>(CI->getType());
3395       VectorType *ITy = VectorType::getInteger(FTy);
3396       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3397                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3398       Rep = Builder.CreateBitCast(Rep, FTy);
3399       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3400                           CI->getArgOperand(2));
3401     } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3402                          Name.starts_with("avx512.mask.pandn."))) {
3403       VectorType *FTy = cast<VectorType>(CI->getType());
3404       VectorType *ITy = VectorType::getInteger(FTy);
3405       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3406       Rep = Builder.CreateAnd(Rep,
3407                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3408       Rep = Builder.CreateBitCast(Rep, FTy);
3409       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3410                           CI->getArgOperand(2));
3411     } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3412                          Name.starts_with("avx512.mask.por."))) {
3413       VectorType *FTy = cast<VectorType>(CI->getType());
3414       VectorType *ITy = VectorType::getInteger(FTy);
3415       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3416                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3417       Rep = Builder.CreateBitCast(Rep, FTy);
3418       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3419                           CI->getArgOperand(2));
3420     } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3421                          Name.starts_with("avx512.mask.pxor."))) {
3422       VectorType *FTy = cast<VectorType>(CI->getType());
3423       VectorType *ITy = VectorType::getInteger(FTy);
3424       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3425                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3426       Rep = Builder.CreateBitCast(Rep, FTy);
3427       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3428                           CI->getArgOperand(2));
3429     } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3430       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3431       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3432                           CI->getArgOperand(2));
3433     } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3434       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3435       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3436                           CI->getArgOperand(2));
3437     } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3438       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3439       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3440                           CI->getArgOperand(2));
3441     } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3442       if (Name.ends_with(".512")) {
3443         Intrinsic::ID IID;
3444         if (Name[17] == 's')
3445           IID = Intrinsic::x86_avx512_add_ps_512;
3446         else
3447           IID = Intrinsic::x86_avx512_add_pd_512;
3448 
3449         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3450                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3451                                    CI->getArgOperand(4) });
3452       } else {
3453         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3454       }
3455       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3456                           CI->getArgOperand(2));
3457     } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3458       if (Name.ends_with(".512")) {
3459         Intrinsic::ID IID;
3460         if (Name[17] == 's')
3461           IID = Intrinsic::x86_avx512_div_ps_512;
3462         else
3463           IID = Intrinsic::x86_avx512_div_pd_512;
3464 
3465         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3466                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3467                                    CI->getArgOperand(4) });
3468       } else {
3469         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3470       }
3471       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3472                           CI->getArgOperand(2));
3473     } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3474       if (Name.ends_with(".512")) {
3475         Intrinsic::ID IID;
3476         if (Name[17] == 's')
3477           IID = Intrinsic::x86_avx512_mul_ps_512;
3478         else
3479           IID = Intrinsic::x86_avx512_mul_pd_512;
3480 
3481         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3482                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3483                                    CI->getArgOperand(4) });
3484       } else {
3485         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3486       }
3487       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3488                           CI->getArgOperand(2));
3489     } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3490       if (Name.ends_with(".512")) {
3491         Intrinsic::ID IID;
3492         if (Name[17] == 's')
3493           IID = Intrinsic::x86_avx512_sub_ps_512;
3494         else
3495           IID = Intrinsic::x86_avx512_sub_pd_512;
3496 
3497         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3498                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3499                                    CI->getArgOperand(4) });
3500       } else {
3501         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3502       }
3503       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3504                           CI->getArgOperand(2));
3505     } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3506                          Name.starts_with("avx512.mask.min.p")) &&
3507                Name.drop_front(18) == ".512") {
3508       bool IsDouble = Name[17] == 'd';
3509       bool IsMin = Name[13] == 'i';
3510       static const Intrinsic::ID MinMaxTbl[2][2] = {
3511         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3512         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3513       };
3514       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3515 
3516       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3517                                { CI->getArgOperand(0), CI->getArgOperand(1),
3518                                  CI->getArgOperand(4) });
3519       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3520                           CI->getArgOperand(2));
3521     } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3522       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3523                                                          Intrinsic::ctlz,
3524                                                          CI->getType()),
3525                                { CI->getArgOperand(0), Builder.getInt1(false) });
3526       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3527                           CI->getArgOperand(1));
3528     } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3529       bool IsImmediate = Name[16] == 'i' ||
3530                          (Name.size() > 18 && Name[18] == 'i');
3531       bool IsVariable = Name[16] == 'v';
3532       char Size = Name[16] == '.' ? Name[17] :
3533                   Name[17] == '.' ? Name[18] :
3534                   Name[18] == '.' ? Name[19] :
3535                                     Name[20];
3536 
3537       Intrinsic::ID IID;
3538       if (IsVariable && Name[17] != '.') {
3539         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3540           IID = Intrinsic::x86_avx2_psllv_q;
3541         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3542           IID = Intrinsic::x86_avx2_psllv_q_256;
3543         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3544           IID = Intrinsic::x86_avx2_psllv_d;
3545         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3546           IID = Intrinsic::x86_avx2_psllv_d_256;
3547         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3548           IID = Intrinsic::x86_avx512_psllv_w_128;
3549         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3550           IID = Intrinsic::x86_avx512_psllv_w_256;
3551         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3552           IID = Intrinsic::x86_avx512_psllv_w_512;
3553         else
3554           llvm_unreachable("Unexpected size");
3555       } else if (Name.ends_with(".128")) {
3556         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3557           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3558                             : Intrinsic::x86_sse2_psll_d;
3559         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3560           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3561                             : Intrinsic::x86_sse2_psll_q;
3562         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3563           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3564                             : Intrinsic::x86_sse2_psll_w;
3565         else
3566           llvm_unreachable("Unexpected size");
3567       } else if (Name.ends_with(".256")) {
3568         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3569           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3570                             : Intrinsic::x86_avx2_psll_d;
3571         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3572           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3573                             : Intrinsic::x86_avx2_psll_q;
3574         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3575           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3576                             : Intrinsic::x86_avx2_psll_w;
3577         else
3578           llvm_unreachable("Unexpected size");
3579       } else {
3580         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3581           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3582                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3583                               Intrinsic::x86_avx512_psll_d_512;
3584         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3585           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3586                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3587                               Intrinsic::x86_avx512_psll_q_512;
3588         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3589           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3590                             : Intrinsic::x86_avx512_psll_w_512;
3591         else
3592           llvm_unreachable("Unexpected size");
3593       }
3594 
3595       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3596     } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3597       bool IsImmediate = Name[16] == 'i' ||
3598                          (Name.size() > 18 && Name[18] == 'i');
3599       bool IsVariable = Name[16] == 'v';
3600       char Size = Name[16] == '.' ? Name[17] :
3601                   Name[17] == '.' ? Name[18] :
3602                   Name[18] == '.' ? Name[19] :
3603                                     Name[20];
3604 
3605       Intrinsic::ID IID;
3606       if (IsVariable && Name[17] != '.') {
3607         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3608           IID = Intrinsic::x86_avx2_psrlv_q;
3609         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3610           IID = Intrinsic::x86_avx2_psrlv_q_256;
3611         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3612           IID = Intrinsic::x86_avx2_psrlv_d;
3613         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3614           IID = Intrinsic::x86_avx2_psrlv_d_256;
3615         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3616           IID = Intrinsic::x86_avx512_psrlv_w_128;
3617         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3618           IID = Intrinsic::x86_avx512_psrlv_w_256;
3619         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3620           IID = Intrinsic::x86_avx512_psrlv_w_512;
3621         else
3622           llvm_unreachable("Unexpected size");
3623       } else if (Name.ends_with(".128")) {
3624         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3625           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3626                             : Intrinsic::x86_sse2_psrl_d;
3627         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3628           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3629                             : Intrinsic::x86_sse2_psrl_q;
3630         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3631           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3632                             : Intrinsic::x86_sse2_psrl_w;
3633         else
3634           llvm_unreachable("Unexpected size");
3635       } else if (Name.ends_with(".256")) {
3636         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3637           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3638                             : Intrinsic::x86_avx2_psrl_d;
3639         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3640           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3641                             : Intrinsic::x86_avx2_psrl_q;
3642         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3643           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3644                             : Intrinsic::x86_avx2_psrl_w;
3645         else
3646           llvm_unreachable("Unexpected size");
3647       } else {
3648         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3649           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3650                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3651                               Intrinsic::x86_avx512_psrl_d_512;
3652         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3653           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3654                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3655                               Intrinsic::x86_avx512_psrl_q_512;
3656         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3657           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3658                             : Intrinsic::x86_avx512_psrl_w_512;
3659         else
3660           llvm_unreachable("Unexpected size");
3661       }
3662 
3663       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3664     } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3665       bool IsImmediate = Name[16] == 'i' ||
3666                          (Name.size() > 18 && Name[18] == 'i');
3667       bool IsVariable = Name[16] == 'v';
3668       char Size = Name[16] == '.' ? Name[17] :
3669                   Name[17] == '.' ? Name[18] :
3670                   Name[18] == '.' ? Name[19] :
3671                                     Name[20];
3672 
3673       Intrinsic::ID IID;
3674       if (IsVariable && Name[17] != '.') {
3675         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3676           IID = Intrinsic::x86_avx2_psrav_d;
3677         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3678           IID = Intrinsic::x86_avx2_psrav_d_256;
3679         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3680           IID = Intrinsic::x86_avx512_psrav_w_128;
3681         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3682           IID = Intrinsic::x86_avx512_psrav_w_256;
3683         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3684           IID = Intrinsic::x86_avx512_psrav_w_512;
3685         else
3686           llvm_unreachable("Unexpected size");
3687       } else if (Name.ends_with(".128")) {
3688         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3689           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3690                             : Intrinsic::x86_sse2_psra_d;
3691         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3692           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3693                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3694                               Intrinsic::x86_avx512_psra_q_128;
3695         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3696           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3697                             : Intrinsic::x86_sse2_psra_w;
3698         else
3699           llvm_unreachable("Unexpected size");
3700       } else if (Name.ends_with(".256")) {
3701         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3702           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3703                             : Intrinsic::x86_avx2_psra_d;
3704         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3705           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3706                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3707                               Intrinsic::x86_avx512_psra_q_256;
3708         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3709           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3710                             : Intrinsic::x86_avx2_psra_w;
3711         else
3712           llvm_unreachable("Unexpected size");
3713       } else {
3714         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3715           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3716                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3717                               Intrinsic::x86_avx512_psra_d_512;
3718         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3719           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3720                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3721                               Intrinsic::x86_avx512_psra_q_512;
3722         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3723           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3724                             : Intrinsic::x86_avx512_psra_w_512;
3725         else
3726           llvm_unreachable("Unexpected size");
3727       }
3728 
3729       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3730     } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3731       Rep = upgradeMaskedMove(Builder, *CI);
3732     } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3733       Rep = UpgradeMaskToInt(Builder, *CI);
3734     } else if (IsX86 && Name.ends_with(".movntdqa")) {
3735       MDNode *Node = MDNode::get(
3736           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3737 
3738       Value *Ptr = CI->getArgOperand(0);
3739 
3740       // Convert the type of the pointer to a pointer to the stored type.
3741       Value *BC = Builder.CreateBitCast(
3742           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3743       LoadInst *LI = Builder.CreateAlignedLoad(
3744           CI->getType(), BC,
3745           Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3746       LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3747       Rep = LI;
3748     } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3749                          Name.starts_with("fma.vfmsub.") ||
3750                          Name.starts_with("fma.vfnmadd.") ||
3751                          Name.starts_with("fma.vfnmsub."))) {
3752       bool NegMul = Name[6] == 'n';
3753       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3754       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3755 
3756       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3757                        CI->getArgOperand(2) };
3758 
3759       if (IsScalar) {
3760         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3761         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3762         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3763       }
3764 
3765       if (NegMul && !IsScalar)
3766         Ops[0] = Builder.CreateFNeg(Ops[0]);
3767       if (NegMul && IsScalar)
3768         Ops[1] = Builder.CreateFNeg(Ops[1]);
3769       if (NegAcc)
3770         Ops[2] = Builder.CreateFNeg(Ops[2]);
3771 
3772       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3773                                                          Intrinsic::fma,
3774                                                          Ops[0]->getType()),
3775                                Ops);
3776 
3777       if (IsScalar)
3778         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3779                                           (uint64_t)0);
3780     } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3781       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3782                        CI->getArgOperand(2) };
3783 
3784       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3785       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3786       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3787 
3788       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3789                                                          Intrinsic::fma,
3790                                                          Ops[0]->getType()),
3791                                Ops);
3792 
3793       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3794                                         Rep, (uint64_t)0);
3795     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3796                          Name.starts_with("avx512.maskz.vfmadd.s") ||
3797                          Name.starts_with("avx512.mask3.vfmadd.s") ||
3798                          Name.starts_with("avx512.mask3.vfmsub.s") ||
3799                          Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3800       bool IsMask3 = Name[11] == '3';
3801       bool IsMaskZ = Name[11] == 'z';
3802       // Drop the "avx512.mask." to make it easier.
3803       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3804       bool NegMul = Name[2] == 'n';
3805       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3806 
3807       Value *A = CI->getArgOperand(0);
3808       Value *B = CI->getArgOperand(1);
3809       Value *C = CI->getArgOperand(2);
3810 
3811       if (NegMul && (IsMask3 || IsMaskZ))
3812         A = Builder.CreateFNeg(A);
3813       if (NegMul && !(IsMask3 || IsMaskZ))
3814         B = Builder.CreateFNeg(B);
3815       if (NegAcc)
3816         C = Builder.CreateFNeg(C);
3817 
3818       A = Builder.CreateExtractElement(A, (uint64_t)0);
3819       B = Builder.CreateExtractElement(B, (uint64_t)0);
3820       C = Builder.CreateExtractElement(C, (uint64_t)0);
3821 
3822       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3823           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3824         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3825 
3826         Intrinsic::ID IID;
3827         if (Name.back() == 'd')
3828           IID = Intrinsic::x86_avx512_vfmadd_f64;
3829         else
3830           IID = Intrinsic::x86_avx512_vfmadd_f32;
3831         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3832         Rep = Builder.CreateCall(FMA, Ops);
3833       } else {
3834         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3835                                                   Intrinsic::fma,
3836                                                   A->getType());
3837         Rep = Builder.CreateCall(FMA, { A, B, C });
3838       }
3839 
3840       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3841                         IsMask3 ? C : A;
3842 
3843       // For Mask3 with NegAcc, we need to create a new extractelement that
3844       // avoids the negation above.
3845       if (NegAcc && IsMask3)
3846         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3847                                                 (uint64_t)0);
3848 
3849       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3850                                 Rep, PassThru);
3851       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3852                                         Rep, (uint64_t)0);
3853     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3854                          Name.starts_with("avx512.mask.vfnmadd.p") ||
3855                          Name.starts_with("avx512.mask.vfnmsub.p") ||
3856                          Name.starts_with("avx512.mask3.vfmadd.p") ||
3857                          Name.starts_with("avx512.mask3.vfmsub.p") ||
3858                          Name.starts_with("avx512.mask3.vfnmsub.p") ||
3859                          Name.starts_with("avx512.maskz.vfmadd.p"))) {
3860       bool IsMask3 = Name[11] == '3';
3861       bool IsMaskZ = Name[11] == 'z';
3862       // Drop the "avx512.mask." to make it easier.
3863       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3864       bool NegMul = Name[2] == 'n';
3865       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3866 
3867       Value *A = CI->getArgOperand(0);
3868       Value *B = CI->getArgOperand(1);
3869       Value *C = CI->getArgOperand(2);
3870 
3871       if (NegMul && (IsMask3 || IsMaskZ))
3872         A = Builder.CreateFNeg(A);
3873       if (NegMul && !(IsMask3 || IsMaskZ))
3874         B = Builder.CreateFNeg(B);
3875       if (NegAcc)
3876         C = Builder.CreateFNeg(C);
3877 
3878       if (CI->arg_size() == 5 &&
3879           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3880            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3881         Intrinsic::ID IID;
3882         // Check the character before ".512" in string.
3883         if (Name[Name.size()-5] == 's')
3884           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3885         else
3886           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3887 
3888         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3889                                  { A, B, C, CI->getArgOperand(4) });
3890       } else {
3891         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3892                                                   Intrinsic::fma,
3893                                                   A->getType());
3894         Rep = Builder.CreateCall(FMA, { A, B, C });
3895       }
3896 
3897       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3898                         IsMask3 ? CI->getArgOperand(2) :
3899                                   CI->getArgOperand(0);
3900 
3901       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3902     } else if (IsX86 &&  Name.starts_with("fma.vfmsubadd.p")) {
3903       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3904       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3905       Intrinsic::ID IID;
3906       if (VecWidth == 128 && EltWidth == 32)
3907         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3908       else if (VecWidth == 256 && EltWidth == 32)
3909         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3910       else if (VecWidth == 128 && EltWidth == 64)
3911         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3912       else if (VecWidth == 256 && EltWidth == 64)
3913         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3914       else
3915         llvm_unreachable("Unexpected intrinsic");
3916 
3917       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3918                        CI->getArgOperand(2) };
3919       Ops[2] = Builder.CreateFNeg(Ops[2]);
3920       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3921                                Ops);
3922     } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3923                          Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3924                          Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3925                          Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
3926       bool IsMask3 = Name[11] == '3';
3927       bool IsMaskZ = Name[11] == 'z';
3928       // Drop the "avx512.mask." to make it easier.
3929       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3930       bool IsSubAdd = Name[3] == 's';
3931       if (CI->arg_size() == 5) {
3932         Intrinsic::ID IID;
3933         // Check the character before ".512" in string.
3934         if (Name[Name.size()-5] == 's')
3935           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3936         else
3937           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3938 
3939         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3940                          CI->getArgOperand(2), CI->getArgOperand(4) };
3941         if (IsSubAdd)
3942           Ops[2] = Builder.CreateFNeg(Ops[2]);
3943 
3944         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3945                                  Ops);
3946       } else {
3947         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3948 
3949         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3950                          CI->getArgOperand(2) };
3951 
3952         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3953                                                   Ops[0]->getType());
3954         Value *Odd = Builder.CreateCall(FMA, Ops);
3955         Ops[2] = Builder.CreateFNeg(Ops[2]);
3956         Value *Even = Builder.CreateCall(FMA, Ops);
3957 
3958         if (IsSubAdd)
3959           std::swap(Even, Odd);
3960 
3961         SmallVector<int, 32> Idxs(NumElts);
3962         for (int i = 0; i != NumElts; ++i)
3963           Idxs[i] = i + (i % 2) * NumElts;
3964 
3965         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3966       }
3967 
3968       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3969                         IsMask3 ? CI->getArgOperand(2) :
3970                                   CI->getArgOperand(0);
3971 
3972       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3973     } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
3974                          Name.starts_with("avx512.maskz.pternlog."))) {
3975       bool ZeroMask = Name[11] == 'z';
3976       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3977       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3978       Intrinsic::ID IID;
3979       if (VecWidth == 128 && EltWidth == 32)
3980         IID = Intrinsic::x86_avx512_pternlog_d_128;
3981       else if (VecWidth == 256 && EltWidth == 32)
3982         IID = Intrinsic::x86_avx512_pternlog_d_256;
3983       else if (VecWidth == 512 && EltWidth == 32)
3984         IID = Intrinsic::x86_avx512_pternlog_d_512;
3985       else if (VecWidth == 128 && EltWidth == 64)
3986         IID = Intrinsic::x86_avx512_pternlog_q_128;
3987       else if (VecWidth == 256 && EltWidth == 64)
3988         IID = Intrinsic::x86_avx512_pternlog_q_256;
3989       else if (VecWidth == 512 && EltWidth == 64)
3990         IID = Intrinsic::x86_avx512_pternlog_q_512;
3991       else
3992         llvm_unreachable("Unexpected intrinsic");
3993 
3994       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3995                         CI->getArgOperand(2), CI->getArgOperand(3) };
3996       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3997                                Args);
3998       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3999                                  : CI->getArgOperand(0);
4000       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4001     } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
4002                          Name.starts_with("avx512.maskz.vpmadd52"))) {
4003       bool ZeroMask = Name[11] == 'z';
4004       bool High = Name[20] == 'h' || Name[21] == 'h';
4005       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4006       Intrinsic::ID IID;
4007       if (VecWidth == 128 && !High)
4008         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4009       else if (VecWidth == 256 && !High)
4010         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4011       else if (VecWidth == 512 && !High)
4012         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4013       else if (VecWidth == 128 && High)
4014         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4015       else if (VecWidth == 256 && High)
4016         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4017       else if (VecWidth == 512 && High)
4018         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4019       else
4020         llvm_unreachable("Unexpected intrinsic");
4021 
4022       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4023                         CI->getArgOperand(2) };
4024       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4025                                Args);
4026       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4027                                  : CI->getArgOperand(0);
4028       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4029     } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
4030                          Name.starts_with("avx512.mask.vpermt2var.") ||
4031                          Name.starts_with("avx512.maskz.vpermt2var."))) {
4032       bool ZeroMask = Name[11] == 'z';
4033       bool IndexForm = Name[17] == 'i';
4034       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4035     } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
4036                          Name.starts_with("avx512.maskz.vpdpbusd.") ||
4037                          Name.starts_with("avx512.mask.vpdpbusds.") ||
4038                          Name.starts_with("avx512.maskz.vpdpbusds."))) {
4039       bool ZeroMask = Name[11] == 'z';
4040       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4041       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4042       Intrinsic::ID IID;
4043       if (VecWidth == 128 && !IsSaturating)
4044         IID = Intrinsic::x86_avx512_vpdpbusd_128;
4045       else if (VecWidth == 256 && !IsSaturating)
4046         IID = Intrinsic::x86_avx512_vpdpbusd_256;
4047       else if (VecWidth == 512 && !IsSaturating)
4048         IID = Intrinsic::x86_avx512_vpdpbusd_512;
4049       else if (VecWidth == 128 && IsSaturating)
4050         IID = Intrinsic::x86_avx512_vpdpbusds_128;
4051       else if (VecWidth == 256 && IsSaturating)
4052         IID = Intrinsic::x86_avx512_vpdpbusds_256;
4053       else if (VecWidth == 512 && IsSaturating)
4054         IID = Intrinsic::x86_avx512_vpdpbusds_512;
4055       else
4056         llvm_unreachable("Unexpected intrinsic");
4057 
4058       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4059                         CI->getArgOperand(2)  };
4060       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4061                                Args);
4062       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4063                                  : CI->getArgOperand(0);
4064       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4065     } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4066                          Name.starts_with("avx512.maskz.vpdpwssd.") ||
4067                          Name.starts_with("avx512.mask.vpdpwssds.") ||
4068                          Name.starts_with("avx512.maskz.vpdpwssds."))) {
4069       bool ZeroMask = Name[11] == 'z';
4070       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4071       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4072       Intrinsic::ID IID;
4073       if (VecWidth == 128 && !IsSaturating)
4074         IID = Intrinsic::x86_avx512_vpdpwssd_128;
4075       else if (VecWidth == 256 && !IsSaturating)
4076         IID = Intrinsic::x86_avx512_vpdpwssd_256;
4077       else if (VecWidth == 512 && !IsSaturating)
4078         IID = Intrinsic::x86_avx512_vpdpwssd_512;
4079       else if (VecWidth == 128 && IsSaturating)
4080         IID = Intrinsic::x86_avx512_vpdpwssds_128;
4081       else if (VecWidth == 256 && IsSaturating)
4082         IID = Intrinsic::x86_avx512_vpdpwssds_256;
4083       else if (VecWidth == 512 && IsSaturating)
4084         IID = Intrinsic::x86_avx512_vpdpwssds_512;
4085       else
4086         llvm_unreachable("Unexpected intrinsic");
4087 
4088       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4089                         CI->getArgOperand(2)  };
4090       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4091                                Args);
4092       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4093                                  : CI->getArgOperand(0);
4094       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4095     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4096                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
4097                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
4098       Intrinsic::ID IID;
4099       if (Name[0] == 'a' && Name.back() == '2')
4100         IID = Intrinsic::x86_addcarry_32;
4101       else if (Name[0] == 'a' && Name.back() == '4')
4102         IID = Intrinsic::x86_addcarry_64;
4103       else if (Name[0] == 's' && Name.back() == '2')
4104         IID = Intrinsic::x86_subborrow_32;
4105       else if (Name[0] == 's' && Name.back() == '4')
4106         IID = Intrinsic::x86_subborrow_64;
4107       else
4108         llvm_unreachable("Unexpected intrinsic");
4109 
4110       // Make a call with 3 operands.
4111       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4112                         CI->getArgOperand(2)};
4113       Value *NewCall = Builder.CreateCall(
4114                                 Intrinsic::getDeclaration(CI->getModule(), IID),
4115                                 Args);
4116 
4117       // Extract the second result and store it.
4118       Value *Data = Builder.CreateExtractValue(NewCall, 1);
4119       // Cast the pointer to the right type.
4120       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4121                                  llvm::PointerType::getUnqual(Data->getType()));
4122       Builder.CreateAlignedStore(Data, Ptr, Align(1));
4123       // Replace the original call result with the first result of the new call.
4124       Value *CF = Builder.CreateExtractValue(NewCall, 0);
4125 
4126       CI->replaceAllUsesWith(CF);
4127       Rep = nullptr;
4128     } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4129                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4130       // Rep will be updated by the call in the condition.
4131     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4132       Value *Arg = CI->getArgOperand(0);
4133       Value *Neg = Builder.CreateNeg(Arg, "neg");
4134       Value *Cmp = Builder.CreateICmpSGE(
4135           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4136       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4137     } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4138                           Name.starts_with("atomic.load.add.f64.p"))) {
4139       Value *Ptr = CI->getArgOperand(0);
4140       Value *Val = CI->getArgOperand(1);
4141       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4142                                     AtomicOrdering::SequentiallyConsistent);
4143     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
4144                           Name == "max.ui" || Name == "max.ull")) {
4145       Value *Arg0 = CI->getArgOperand(0);
4146       Value *Arg1 = CI->getArgOperand(1);
4147       Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
4148                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4149                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4150       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4151     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
4152                           Name == "min.ui" || Name == "min.ull")) {
4153       Value *Arg0 = CI->getArgOperand(0);
4154       Value *Arg1 = CI->getArgOperand(1);
4155       Value *Cmp = Name.ends_with(".ui") || Name.ends_with(".ull")
4156                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4157                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4158       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4159     } else if (IsNVVM && Name == "clz.ll") {
4160       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
4161       Value *Arg = CI->getArgOperand(0);
4162       Value *Ctlz = Builder.CreateCall(
4163           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4164                                     {Arg->getType()}),
4165           {Arg, Builder.getFalse()}, "ctlz");
4166       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4167     } else if (IsNVVM && Name == "popc.ll") {
4168       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
4169       // i64.
4170       Value *Arg = CI->getArgOperand(0);
4171       Value *Popc = Builder.CreateCall(
4172           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4173                                     {Arg->getType()}),
4174           Arg, "ctpop");
4175       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4176     } else if (IsNVVM) {
4177       if (Name == "h2f") {
4178         Rep =
4179             Builder.CreateCall(Intrinsic::getDeclaration(
4180                                    F->getParent(), Intrinsic::convert_from_fp16,
4181                                    {Builder.getFloatTy()}),
4182                                CI->getArgOperand(0), "h2f");
4183       } else {
4184         Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
4185         if (IID != Intrinsic::not_intrinsic &&
4186             !F->getReturnType()->getScalarType()->isBFloatTy()) {
4187           rename(F);
4188           NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4189           SmallVector<Value *, 2> Args;
4190           for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4191             Value *Arg = CI->getArgOperand(I);
4192             Type *OldType = Arg->getType();
4193             Type *NewType = NewFn->getArg(I)->getType();
4194             Args.push_back((OldType->isIntegerTy() &&
4195                             NewType->getScalarType()->isBFloatTy())
4196                                ? Builder.CreateBitCast(Arg, NewType)
4197                                : Arg);
4198           }
4199           Rep = Builder.CreateCall(NewFn, Args);
4200           if (F->getReturnType()->isIntegerTy())
4201             Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4202         }
4203       }
4204     } else if (IsARM) {
4205       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
4206     } else if (IsAMDGCN) {
4207       Rep = UpgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4208     } else {
4209       llvm_unreachable("Unknown function for CallBase upgrade.");
4210     }
4211 
4212     if (Rep)
4213       CI->replaceAllUsesWith(Rep);
4214     CI->eraseFromParent();
4215     return;
4216   }
4217 
4218   const auto &DefaultCase = [&]() -> void {
4219     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4220       // Handle generic mangling change.
4221       assert(
4222           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4223           "Unknown function for CallBase upgrade and isn't just a name change");
4224       CI->setCalledFunction(NewFn);
4225       return;
4226     }
4227 
4228     // This must be an upgrade from a named to a literal struct.
4229     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4230       assert(OldST != NewFn->getReturnType() &&
4231              "Return type must have changed");
4232       assert(OldST->getNumElements() ==
4233                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4234              "Must have same number of elements");
4235 
4236       SmallVector<Value *> Args(CI->args());
4237       Value *NewCI = Builder.CreateCall(NewFn, Args);
4238       Value *Res = PoisonValue::get(OldST);
4239       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4240         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4241         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4242       }
4243       CI->replaceAllUsesWith(Res);
4244       CI->eraseFromParent();
4245       return;
4246     }
4247 
4248     // We're probably about to produce something invalid. Let the verifier catch
4249     // it instead of dying here.
4250     CI->setCalledOperand(
4251         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4252     return;
4253   };
4254   CallInst *NewCall = nullptr;
4255   switch (NewFn->getIntrinsicID()) {
4256   default: {
4257     DefaultCase();
4258     return;
4259   }
4260   case Intrinsic::arm_neon_vst1:
4261   case Intrinsic::arm_neon_vst2:
4262   case Intrinsic::arm_neon_vst3:
4263   case Intrinsic::arm_neon_vst4:
4264   case Intrinsic::arm_neon_vst2lane:
4265   case Intrinsic::arm_neon_vst3lane:
4266   case Intrinsic::arm_neon_vst4lane: {
4267     SmallVector<Value *, 4> Args(CI->args());
4268     NewCall = Builder.CreateCall(NewFn, Args);
4269     break;
4270   }
4271   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4272   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4273   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4274     LLVMContext &Ctx = F->getParent()->getContext();
4275     SmallVector<Value *, 4> Args(CI->args());
4276     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4277                                cast<ConstantInt>(Args[3])->getZExtValue());
4278     NewCall = Builder.CreateCall(NewFn, Args);
4279     break;
4280   }
4281   case Intrinsic::aarch64_sve_ld3_sret:
4282   case Intrinsic::aarch64_sve_ld4_sret:
4283   case Intrinsic::aarch64_sve_ld2_sret: {
4284     StringRef Name = F->getName();
4285     Name = Name.substr(5);
4286     unsigned N = StringSwitch<unsigned>(Name)
4287                      .StartsWith("aarch64.sve.ld2", 2)
4288                      .StartsWith("aarch64.sve.ld3", 3)
4289                      .StartsWith("aarch64.sve.ld4", 4)
4290                      .Default(0);
4291     ScalableVectorType *RetTy =
4292         dyn_cast<ScalableVectorType>(F->getReturnType());
4293     unsigned MinElts = RetTy->getMinNumElements() / N;
4294     SmallVector<Value *, 2> Args(CI->args());
4295     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4296     Value *Ret = llvm::PoisonValue::get(RetTy);
4297     for (unsigned I = 0; I < N; I++) {
4298       Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4299       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4300       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4301     }
4302     NewCall = dyn_cast<CallInst>(Ret);
4303     break;
4304   }
4305 
4306   case Intrinsic::coro_end: {
4307     SmallVector<Value *, 3> Args(CI->args());
4308     Args.push_back(ConstantTokenNone::get(CI->getContext()));
4309     NewCall = Builder.CreateCall(NewFn, Args);
4310     break;
4311   }
4312 
4313   case Intrinsic::vector_extract: {
4314     StringRef Name = F->getName();
4315     Name = Name.substr(5); // Strip llvm
4316     if (!Name.starts_with("aarch64.sve.tuple.get")) {
4317       DefaultCase();
4318       return;
4319     }
4320     ScalableVectorType *RetTy =
4321         dyn_cast<ScalableVectorType>(F->getReturnType());
4322     unsigned MinElts = RetTy->getMinNumElements();
4323     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4324     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4325     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4326     break;
4327   }
4328 
4329   case Intrinsic::vector_insert: {
4330     StringRef Name = F->getName();
4331     Name = Name.substr(5);
4332     if (!Name.starts_with("aarch64.sve.tuple")) {
4333       DefaultCase();
4334       return;
4335     }
4336     if (Name.starts_with("aarch64.sve.tuple.set")) {
4337       unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4338       ScalableVectorType *Ty =
4339           dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4340       Value *NewIdx =
4341           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4342       NewCall = Builder.CreateCall(
4343           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4344       break;
4345     }
4346     if (Name.starts_with("aarch64.sve.tuple.create")) {
4347       unsigned N = StringSwitch<unsigned>(Name)
4348                        .StartsWith("aarch64.sve.tuple.create2", 2)
4349                        .StartsWith("aarch64.sve.tuple.create3", 3)
4350                        .StartsWith("aarch64.sve.tuple.create4", 4)
4351                        .Default(0);
4352       assert(N > 1 && "Create is expected to be between 2-4");
4353       ScalableVectorType *RetTy =
4354           dyn_cast<ScalableVectorType>(F->getReturnType());
4355       Value *Ret = llvm::PoisonValue::get(RetTy);
4356       unsigned MinElts = RetTy->getMinNumElements() / N;
4357       for (unsigned I = 0; I < N; I++) {
4358         Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4359         Value *V = CI->getArgOperand(I);
4360         Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4361       }
4362       NewCall = dyn_cast<CallInst>(Ret);
4363     }
4364     break;
4365   }
4366 
4367   case Intrinsic::arm_neon_bfdot:
4368   case Intrinsic::arm_neon_bfmmla:
4369   case Intrinsic::arm_neon_bfmlalb:
4370   case Intrinsic::arm_neon_bfmlalt:
4371   case Intrinsic::aarch64_neon_bfdot:
4372   case Intrinsic::aarch64_neon_bfmmla:
4373   case Intrinsic::aarch64_neon_bfmlalb:
4374   case Intrinsic::aarch64_neon_bfmlalt: {
4375     SmallVector<Value *, 3> Args;
4376     assert(CI->arg_size() == 3 &&
4377            "Mismatch between function args and call args");
4378     size_t OperandWidth =
4379         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4380     assert((OperandWidth == 64 || OperandWidth == 128) &&
4381            "Unexpected operand width");
4382     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4383     auto Iter = CI->args().begin();
4384     Args.push_back(*Iter++);
4385     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4386     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4387     NewCall = Builder.CreateCall(NewFn, Args);
4388     break;
4389   }
4390 
4391   case Intrinsic::bitreverse:
4392     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4393     break;
4394 
4395   case Intrinsic::ctlz:
4396   case Intrinsic::cttz:
4397     assert(CI->arg_size() == 1 &&
4398            "Mismatch between function args and call args");
4399     NewCall =
4400         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4401     break;
4402 
4403   case Intrinsic::objectsize: {
4404     Value *NullIsUnknownSize =
4405         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4406     Value *Dynamic =
4407         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4408     NewCall = Builder.CreateCall(
4409         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4410     break;
4411   }
4412 
4413   case Intrinsic::ctpop:
4414     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4415     break;
4416 
4417   case Intrinsic::convert_from_fp16:
4418     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4419     break;
4420 
4421   case Intrinsic::dbg_value: {
4422     StringRef Name = F->getName();
4423     Name = Name.substr(5); // Strip llvm.
4424     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4425     if (Name.starts_with("dbg.addr")) {
4426       DIExpression *Expr = cast<DIExpression>(
4427           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4428       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4429       NewCall =
4430           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4431                                      MetadataAsValue::get(C, Expr)});
4432       break;
4433     }
4434 
4435     // Upgrade from the old version that had an extra offset argument.
4436     assert(CI->arg_size() == 4);
4437     // Drop nonzero offsets instead of attempting to upgrade them.
4438     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4439       if (Offset->isZeroValue()) {
4440         NewCall = Builder.CreateCall(
4441             NewFn,
4442             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4443         break;
4444       }
4445     CI->eraseFromParent();
4446     return;
4447   }
4448 
4449   case Intrinsic::ptr_annotation:
4450     // Upgrade from versions that lacked the annotation attribute argument.
4451     if (CI->arg_size() != 4) {
4452       DefaultCase();
4453       return;
4454     }
4455 
4456     // Create a new call with an added null annotation attribute argument.
4457     NewCall =
4458         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4459                                    CI->getArgOperand(2), CI->getArgOperand(3),
4460                                    Constant::getNullValue(Builder.getPtrTy())});
4461     NewCall->takeName(CI);
4462     CI->replaceAllUsesWith(NewCall);
4463     CI->eraseFromParent();
4464     return;
4465 
4466   case Intrinsic::var_annotation:
4467     // Upgrade from versions that lacked the annotation attribute argument.
4468     if (CI->arg_size() != 4) {
4469       DefaultCase();
4470       return;
4471     }
4472     // Create a new call with an added null annotation attribute argument.
4473     NewCall =
4474         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4475                                    CI->getArgOperand(2), CI->getArgOperand(3),
4476                                    Constant::getNullValue(Builder.getPtrTy())});
4477     NewCall->takeName(CI);
4478     CI->replaceAllUsesWith(NewCall);
4479     CI->eraseFromParent();
4480     return;
4481 
4482   case Intrinsic::riscv_aes32dsi:
4483   case Intrinsic::riscv_aes32dsmi:
4484   case Intrinsic::riscv_aes32esi:
4485   case Intrinsic::riscv_aes32esmi:
4486   case Intrinsic::riscv_sm4ks:
4487   case Intrinsic::riscv_sm4ed: {
4488     // The last argument to these intrinsics used to be i8 and changed to i32.
4489     // The type overload for sm4ks and sm4ed was removed.
4490     Value *Arg2 = CI->getArgOperand(2);
4491     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4492       return;
4493 
4494     Value *Arg0 = CI->getArgOperand(0);
4495     Value *Arg1 = CI->getArgOperand(1);
4496     if (CI->getType()->isIntegerTy(64)) {
4497       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4498       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4499     }
4500 
4501     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4502                             cast<ConstantInt>(Arg2)->getZExtValue());
4503 
4504     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4505     Value *Res = NewCall;
4506     if (Res->getType() != CI->getType())
4507       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4508     NewCall->takeName(CI);
4509     CI->replaceAllUsesWith(Res);
4510     CI->eraseFromParent();
4511     return;
4512   }
4513   case Intrinsic::riscv_sha256sig0:
4514   case Intrinsic::riscv_sha256sig1:
4515   case Intrinsic::riscv_sha256sum0:
4516   case Intrinsic::riscv_sha256sum1:
4517   case Intrinsic::riscv_sm3p0:
4518   case Intrinsic::riscv_sm3p1: {
4519     // The last argument to these intrinsics used to be i8 and changed to i32.
4520     // The type overload for sm4ks and sm4ed was removed.
4521     if (!CI->getType()->isIntegerTy(64))
4522       return;
4523 
4524     Value *Arg =
4525         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4526 
4527     NewCall = Builder.CreateCall(NewFn, Arg);
4528     Value *Res =
4529         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4530     NewCall->takeName(CI);
4531     CI->replaceAllUsesWith(Res);
4532     CI->eraseFromParent();
4533     return;
4534   }
4535 
4536   case Intrinsic::x86_xop_vfrcz_ss:
4537   case Intrinsic::x86_xop_vfrcz_sd:
4538     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4539     break;
4540 
4541   case Intrinsic::x86_xop_vpermil2pd:
4542   case Intrinsic::x86_xop_vpermil2ps:
4543   case Intrinsic::x86_xop_vpermil2pd_256:
4544   case Intrinsic::x86_xop_vpermil2ps_256: {
4545     SmallVector<Value *, 4> Args(CI->args());
4546     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4547     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4548     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4549     NewCall = Builder.CreateCall(NewFn, Args);
4550     break;
4551   }
4552 
4553   case Intrinsic::x86_sse41_ptestc:
4554   case Intrinsic::x86_sse41_ptestz:
4555   case Intrinsic::x86_sse41_ptestnzc: {
4556     // The arguments for these intrinsics used to be v4f32, and changed
4557     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4558     // So, the only thing required is a bitcast for both arguments.
4559     // First, check the arguments have the old type.
4560     Value *Arg0 = CI->getArgOperand(0);
4561     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4562       return;
4563 
4564     // Old intrinsic, add bitcasts
4565     Value *Arg1 = CI->getArgOperand(1);
4566 
4567     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4568 
4569     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4570     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4571 
4572     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4573     break;
4574   }
4575 
4576   case Intrinsic::x86_rdtscp: {
4577     // This used to take 1 arguments. If we have no arguments, it is already
4578     // upgraded.
4579     if (CI->getNumOperands() == 0)
4580       return;
4581 
4582     NewCall = Builder.CreateCall(NewFn);
4583     // Extract the second result and store it.
4584     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4585     // Cast the pointer to the right type.
4586     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4587                                  llvm::PointerType::getUnqual(Data->getType()));
4588     Builder.CreateAlignedStore(Data, Ptr, Align(1));
4589     // Replace the original call result with the first result of the new call.
4590     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4591 
4592     NewCall->takeName(CI);
4593     CI->replaceAllUsesWith(TSC);
4594     CI->eraseFromParent();
4595     return;
4596   }
4597 
4598   case Intrinsic::x86_sse41_insertps:
4599   case Intrinsic::x86_sse41_dppd:
4600   case Intrinsic::x86_sse41_dpps:
4601   case Intrinsic::x86_sse41_mpsadbw:
4602   case Intrinsic::x86_avx_dp_ps_256:
4603   case Intrinsic::x86_avx2_mpsadbw: {
4604     // Need to truncate the last argument from i32 to i8 -- this argument models
4605     // an inherently 8-bit immediate operand to these x86 instructions.
4606     SmallVector<Value *, 4> Args(CI->args());
4607 
4608     // Replace the last argument with a trunc.
4609     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4610     NewCall = Builder.CreateCall(NewFn, Args);
4611     break;
4612   }
4613 
4614   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4615   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4616   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4617   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4618   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4619   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4620     SmallVector<Value *, 4> Args(CI->args());
4621     unsigned NumElts =
4622         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4623     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4624 
4625     NewCall = Builder.CreateCall(NewFn, Args);
4626     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4627 
4628     NewCall->takeName(CI);
4629     CI->replaceAllUsesWith(Res);
4630     CI->eraseFromParent();
4631     return;
4632   }
4633 
4634   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4635   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4636   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4637   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4638   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4639   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4640     SmallVector<Value *, 4> Args(CI->args());
4641     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4642     if (NewFn->getIntrinsicID() ==
4643         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4644       Args[1] = Builder.CreateBitCast(
4645           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4646 
4647     NewCall = Builder.CreateCall(NewFn, Args);
4648     Value *Res = Builder.CreateBitCast(
4649         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4650 
4651     NewCall->takeName(CI);
4652     CI->replaceAllUsesWith(Res);
4653     CI->eraseFromParent();
4654     return;
4655   }
4656   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4657   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4658   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4659     SmallVector<Value *, 4> Args(CI->args());
4660     unsigned NumElts =
4661         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4662     Args[1] = Builder.CreateBitCast(
4663         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4664     Args[2] = Builder.CreateBitCast(
4665         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4666 
4667     NewCall = Builder.CreateCall(NewFn, Args);
4668     break;
4669   }
4670 
4671   case Intrinsic::thread_pointer: {
4672     NewCall = Builder.CreateCall(NewFn, {});
4673     break;
4674   }
4675 
4676   case Intrinsic::memcpy:
4677   case Intrinsic::memmove:
4678   case Intrinsic::memset: {
4679     // We have to make sure that the call signature is what we're expecting.
4680     // We only want to change the old signatures by removing the alignment arg:
4681     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4682     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4683     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4684     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4685     // Note: i8*'s in the above can be any pointer type
4686     if (CI->arg_size() != 5) {
4687       DefaultCase();
4688       return;
4689     }
4690     // Remove alignment argument (3), and add alignment attributes to the
4691     // dest/src pointers.
4692     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4693                       CI->getArgOperand(2), CI->getArgOperand(4)};
4694     NewCall = Builder.CreateCall(NewFn, Args);
4695     AttributeList OldAttrs = CI->getAttributes();
4696     AttributeList NewAttrs = AttributeList::get(
4697         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4698         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4699          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4700     NewCall->setAttributes(NewAttrs);
4701     auto *MemCI = cast<MemIntrinsic>(NewCall);
4702     // All mem intrinsics support dest alignment.
4703     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4704     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4705     // Memcpy/Memmove also support source alignment.
4706     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4707       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4708     break;
4709   }
4710   }
4711   assert(NewCall && "Should have either set this variable or returned through "
4712                     "the default case");
4713   NewCall->takeName(CI);
4714   CI->replaceAllUsesWith(NewCall);
4715   CI->eraseFromParent();
4716 }
4717 
4718 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4719   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4720 
4721   // Check if this function should be upgraded and get the replacement function
4722   // if there is one.
4723   Function *NewFn;
4724   if (UpgradeIntrinsicFunction(F, NewFn)) {
4725     // Replace all users of the old function with the new function or new
4726     // instructions. This is not a range loop because the call is deleted.
4727     for (User *U : make_early_inc_range(F->users()))
4728       if (CallBase *CB = dyn_cast<CallBase>(U))
4729         UpgradeIntrinsicCall(CB, NewFn);
4730 
4731     // Remove old function, no longer used, from the module.
4732     F->eraseFromParent();
4733   }
4734 }
4735 
4736 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4737   const unsigned NumOperands = MD.getNumOperands();
4738   if (NumOperands == 0)
4739     return &MD; // Invalid, punt to a verifier error.
4740 
4741   // Check if the tag uses struct-path aware TBAA format.
4742   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4743     return &MD;
4744 
4745   auto &Context = MD.getContext();
4746   if (NumOperands == 3) {
4747     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4748     MDNode *ScalarType = MDNode::get(Context, Elts);
4749     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4750     Metadata *Elts2[] = {ScalarType, ScalarType,
4751                          ConstantAsMetadata::get(
4752                              Constant::getNullValue(Type::getInt64Ty(Context))),
4753                          MD.getOperand(2)};
4754     return MDNode::get(Context, Elts2);
4755   }
4756   // Create a MDNode <MD, MD, offset 0>
4757   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4758                                     Type::getInt64Ty(Context)))};
4759   return MDNode::get(Context, Elts);
4760 }
4761 
4762 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4763                                       Instruction *&Temp) {
4764   if (Opc != Instruction::BitCast)
4765     return nullptr;
4766 
4767   Temp = nullptr;
4768   Type *SrcTy = V->getType();
4769   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4770       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4771     LLVMContext &Context = V->getContext();
4772 
4773     // We have no information about target data layout, so we assume that
4774     // the maximum pointer size is 64bit.
4775     Type *MidTy = Type::getInt64Ty(Context);
4776     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4777 
4778     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4779   }
4780 
4781   return nullptr;
4782 }
4783 
4784 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4785   if (Opc != Instruction::BitCast)
4786     return nullptr;
4787 
4788   Type *SrcTy = C->getType();
4789   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4790       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4791     LLVMContext &Context = C->getContext();
4792 
4793     // We have no information about target data layout, so we assume that
4794     // the maximum pointer size is 64bit.
4795     Type *MidTy = Type::getInt64Ty(Context);
4796 
4797     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4798                                      DestTy);
4799   }
4800 
4801   return nullptr;
4802 }
4803 
4804 /// Check the debug info version number, if it is out-dated, drop the debug
4805 /// info. Return true if module is modified.
4806 bool llvm::UpgradeDebugInfo(Module &M) {
4807   if (DisableAutoUpgradeDebugInfo)
4808     return false;
4809 
4810   unsigned Version = getDebugMetadataVersionFromModule(M);
4811   if (Version == DEBUG_METADATA_VERSION) {
4812     bool BrokenDebugInfo = false;
4813     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4814       report_fatal_error("Broken module found, compilation aborted!");
4815     if (!BrokenDebugInfo)
4816       // Everything is ok.
4817       return false;
4818     else {
4819       // Diagnose malformed debug info.
4820       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4821       M.getContext().diagnose(Diag);
4822     }
4823   }
4824   bool Modified = StripDebugInfo(M);
4825   if (Modified && Version != DEBUG_METADATA_VERSION) {
4826     // Diagnose a version mismatch.
4827     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4828     M.getContext().diagnose(DiagVersion);
4829   }
4830   return Modified;
4831 }
4832 
4833 /// This checks for objc retain release marker which should be upgraded. It
4834 /// returns true if module is modified.
4835 static bool UpgradeRetainReleaseMarker(Module &M) {
4836   bool Changed = false;
4837   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4838   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4839   if (ModRetainReleaseMarker) {
4840     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4841     if (Op) {
4842       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4843       if (ID) {
4844         SmallVector<StringRef, 4> ValueComp;
4845         ID->getString().split(ValueComp, "#");
4846         if (ValueComp.size() == 2) {
4847           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4848           ID = MDString::get(M.getContext(), NewValue);
4849         }
4850         M.addModuleFlag(Module::Error, MarkerKey, ID);
4851         M.eraseNamedMetadata(ModRetainReleaseMarker);
4852         Changed = true;
4853       }
4854     }
4855   }
4856   return Changed;
4857 }
4858 
4859 void llvm::UpgradeARCRuntime(Module &M) {
4860   // This lambda converts normal function calls to ARC runtime functions to
4861   // intrinsic calls.
4862   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4863                                 llvm::Intrinsic::ID IntrinsicFunc) {
4864     Function *Fn = M.getFunction(OldFunc);
4865 
4866     if (!Fn)
4867       return;
4868 
4869     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4870 
4871     for (User *U : make_early_inc_range(Fn->users())) {
4872       CallInst *CI = dyn_cast<CallInst>(U);
4873       if (!CI || CI->getCalledFunction() != Fn)
4874         continue;
4875 
4876       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4877       FunctionType *NewFuncTy = NewFn->getFunctionType();
4878       SmallVector<Value *, 2> Args;
4879 
4880       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4881       // value to the return type of the old function.
4882       if (NewFuncTy->getReturnType() != CI->getType() &&
4883           !CastInst::castIsValid(Instruction::BitCast, CI,
4884                                  NewFuncTy->getReturnType()))
4885         continue;
4886 
4887       bool InvalidCast = false;
4888 
4889       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4890         Value *Arg = CI->getArgOperand(I);
4891 
4892         // Bitcast argument to the parameter type of the new function if it's
4893         // not a variadic argument.
4894         if (I < NewFuncTy->getNumParams()) {
4895           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4896           // to the parameter type of the new function.
4897           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4898                                      NewFuncTy->getParamType(I))) {
4899             InvalidCast = true;
4900             break;
4901           }
4902           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4903         }
4904         Args.push_back(Arg);
4905       }
4906 
4907       if (InvalidCast)
4908         continue;
4909 
4910       // Create a call instruction that calls the new function.
4911       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4912       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4913       NewCall->takeName(CI);
4914 
4915       // Bitcast the return value back to the type of the old call.
4916       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4917 
4918       if (!CI->use_empty())
4919         CI->replaceAllUsesWith(NewRetVal);
4920       CI->eraseFromParent();
4921     }
4922 
4923     if (Fn->use_empty())
4924       Fn->eraseFromParent();
4925   };
4926 
4927   // Unconditionally convert a call to "clang.arc.use" to a call to
4928   // "llvm.objc.clang.arc.use".
4929   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4930 
4931   // Upgrade the retain release marker. If there is no need to upgrade
4932   // the marker, that means either the module is already new enough to contain
4933   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4934   if (!UpgradeRetainReleaseMarker(M))
4935     return;
4936 
4937   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4938       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4939       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4940       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4941       {"objc_autoreleaseReturnValue",
4942        llvm::Intrinsic::objc_autoreleaseReturnValue},
4943       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4944       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4945       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4946       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4947       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4948       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4949       {"objc_release", llvm::Intrinsic::objc_release},
4950       {"objc_retain", llvm::Intrinsic::objc_retain},
4951       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4952       {"objc_retainAutoreleaseReturnValue",
4953        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4954       {"objc_retainAutoreleasedReturnValue",
4955        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4956       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4957       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4958       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4959       {"objc_unsafeClaimAutoreleasedReturnValue",
4960        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4961       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4962       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4963       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4964       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4965       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4966       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4967       {"objc_arc_annotation_topdown_bbstart",
4968        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4969       {"objc_arc_annotation_topdown_bbend",
4970        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4971       {"objc_arc_annotation_bottomup_bbstart",
4972        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4973       {"objc_arc_annotation_bottomup_bbend",
4974        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4975 
4976   for (auto &I : RuntimeFuncs)
4977     UpgradeToIntrinsic(I.first, I.second);
4978 }
4979 
4980 bool llvm::UpgradeModuleFlags(Module &M) {
4981   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4982   if (!ModFlags)
4983     return false;
4984 
4985   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4986   bool HasSwiftVersionFlag = false;
4987   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4988   uint32_t SwiftABIVersion;
4989   auto Int8Ty = Type::getInt8Ty(M.getContext());
4990   auto Int32Ty = Type::getInt32Ty(M.getContext());
4991 
4992   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4993     MDNode *Op = ModFlags->getOperand(I);
4994     if (Op->getNumOperands() != 3)
4995       continue;
4996     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4997     if (!ID)
4998       continue;
4999     auto SetBehavior = [&](Module::ModFlagBehavior B) {
5000       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5001                               Type::getInt32Ty(M.getContext()), B)),
5002                           MDString::get(M.getContext(), ID->getString()),
5003                           Op->getOperand(2)};
5004       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5005       Changed = true;
5006     };
5007 
5008     if (ID->getString() == "Objective-C Image Info Version")
5009       HasObjCFlag = true;
5010     if (ID->getString() == "Objective-C Class Properties")
5011       HasClassProperties = true;
5012     // Upgrade PIC from Error/Max to Min.
5013     if (ID->getString() == "PIC Level") {
5014       if (auto *Behavior =
5015               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5016         uint64_t V = Behavior->getLimitedValue();
5017         if (V == Module::Error || V == Module::Max)
5018           SetBehavior(Module::Min);
5019       }
5020     }
5021     // Upgrade "PIE Level" from Error to Max.
5022     if (ID->getString() == "PIE Level")
5023       if (auto *Behavior =
5024               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5025         if (Behavior->getLimitedValue() == Module::Error)
5026           SetBehavior(Module::Max);
5027 
5028     // Upgrade branch protection and return address signing module flags. The
5029     // module flag behavior for these fields were Error and now they are Min.
5030     if (ID->getString() == "branch-target-enforcement" ||
5031         ID->getString().starts_with("sign-return-address")) {
5032       if (auto *Behavior =
5033               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5034         if (Behavior->getLimitedValue() == Module::Error) {
5035           Type *Int32Ty = Type::getInt32Ty(M.getContext());
5036           Metadata *Ops[3] = {
5037               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5038               Op->getOperand(1), Op->getOperand(2)};
5039           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5040           Changed = true;
5041         }
5042       }
5043     }
5044 
5045     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5046     // section name so that llvm-lto will not complain about mismatching
5047     // module flags that is functionally the same.
5048     if (ID->getString() == "Objective-C Image Info Section") {
5049       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5050         SmallVector<StringRef, 4> ValueComp;
5051         Value->getString().split(ValueComp, " ");
5052         if (ValueComp.size() != 1) {
5053           std::string NewValue;
5054           for (auto &S : ValueComp)
5055             NewValue += S.str();
5056           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5057                               MDString::get(M.getContext(), NewValue)};
5058           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5059           Changed = true;
5060         }
5061       }
5062     }
5063 
5064     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5065     // If the higher bits are set, it adds new module flag for swift info.
5066     if (ID->getString() == "Objective-C Garbage Collection") {
5067       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5068       if (Md) {
5069         assert(Md->getValue() && "Expected non-empty metadata");
5070         auto Type = Md->getValue()->getType();
5071         if (Type == Int8Ty)
5072           continue;
5073         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5074         if ((Val & 0xff) != Val) {
5075           HasSwiftVersionFlag = true;
5076           SwiftABIVersion = (Val & 0xff00) >> 8;
5077           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5078           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5079         }
5080         Metadata *Ops[3] = {
5081           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5082           Op->getOperand(1),
5083           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5084         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5085         Changed = true;
5086       }
5087     }
5088   }
5089 
5090   // "Objective-C Class Properties" is recently added for Objective-C. We
5091   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5092   // flag of value 0, so we can correclty downgrade this flag when trying to
5093   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5094   // this module flag.
5095   if (HasObjCFlag && !HasClassProperties) {
5096     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5097                     (uint32_t)0);
5098     Changed = true;
5099   }
5100 
5101   if (HasSwiftVersionFlag) {
5102     M.addModuleFlag(Module::Error, "Swift ABI Version",
5103                     SwiftABIVersion);
5104     M.addModuleFlag(Module::Error, "Swift Major Version",
5105                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5106     M.addModuleFlag(Module::Error, "Swift Minor Version",
5107                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5108     Changed = true;
5109   }
5110 
5111   return Changed;
5112 }
5113 
5114 void llvm::UpgradeSectionAttributes(Module &M) {
5115   auto TrimSpaces = [](StringRef Section) -> std::string {
5116     SmallVector<StringRef, 5> Components;
5117     Section.split(Components, ',');
5118 
5119     SmallString<32> Buffer;
5120     raw_svector_ostream OS(Buffer);
5121 
5122     for (auto Component : Components)
5123       OS << ',' << Component.trim();
5124 
5125     return std::string(OS.str().substr(1));
5126   };
5127 
5128   for (auto &GV : M.globals()) {
5129     if (!GV.hasSection())
5130       continue;
5131 
5132     StringRef Section = GV.getSection();
5133 
5134     if (!Section.starts_with("__DATA, __objc_catlist"))
5135       continue;
5136 
5137     // __DATA, __objc_catlist, regular, no_dead_strip
5138     // __DATA,__objc_catlist,regular,no_dead_strip
5139     GV.setSection(TrimSpaces(Section));
5140   }
5141 }
5142 
5143 namespace {
5144 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5145 // callsites within a function that did not also have the strictfp attribute.
5146 // Since 10.0, if strict FP semantics are needed within a function, the
5147 // function must have the strictfp attribute and all calls within the function
5148 // must also have the strictfp attribute. This latter restriction is
5149 // necessary to prevent unwanted libcall simplification when a function is
5150 // being cloned (such as for inlining).
5151 //
5152 // The "dangling" strictfp attribute usage was only used to prevent constant
5153 // folding and other libcall simplification. The nobuiltin attribute on the
5154 // callsite has the same effect.
5155 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5156   StrictFPUpgradeVisitor() = default;
5157 
5158   void visitCallBase(CallBase &Call) {
5159     if (!Call.isStrictFP())
5160       return;
5161     if (isa<ConstrainedFPIntrinsic>(&Call))
5162       return;
5163     // If we get here, the caller doesn't have the strictfp attribute
5164     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5165     Call.removeFnAttr(Attribute::StrictFP);
5166     Call.addFnAttr(Attribute::NoBuiltin);
5167   }
5168 };
5169 } // namespace
5170 
5171 void llvm::UpgradeFunctionAttributes(Function &F) {
5172   // If a function definition doesn't have the strictfp attribute,
5173   // convert any callsite strictfp attributes to nobuiltin.
5174   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5175     StrictFPUpgradeVisitor SFPV;
5176     SFPV.visit(F);
5177   }
5178 
5179   // Remove all incompatibile attributes from function.
5180   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5181   for (auto &Arg : F.args())
5182     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5183 }
5184 
5185 static bool isOldLoopArgument(Metadata *MD) {
5186   auto *T = dyn_cast_or_null<MDTuple>(MD);
5187   if (!T)
5188     return false;
5189   if (T->getNumOperands() < 1)
5190     return false;
5191   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5192   if (!S)
5193     return false;
5194   return S->getString().starts_with("llvm.vectorizer.");
5195 }
5196 
5197 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5198   StringRef OldPrefix = "llvm.vectorizer.";
5199   assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5200 
5201   if (OldTag == "llvm.vectorizer.unroll")
5202     return MDString::get(C, "llvm.loop.interleave.count");
5203 
5204   return MDString::get(
5205       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5206              .str());
5207 }
5208 
5209 static Metadata *upgradeLoopArgument(Metadata *MD) {
5210   auto *T = dyn_cast_or_null<MDTuple>(MD);
5211   if (!T)
5212     return MD;
5213   if (T->getNumOperands() < 1)
5214     return MD;
5215   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5216   if (!OldTag)
5217     return MD;
5218   if (!OldTag->getString().starts_with("llvm.vectorizer."))
5219     return MD;
5220 
5221   // This has an old tag.  Upgrade it.
5222   SmallVector<Metadata *, 8> Ops;
5223   Ops.reserve(T->getNumOperands());
5224   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5225   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5226     Ops.push_back(T->getOperand(I));
5227 
5228   return MDTuple::get(T->getContext(), Ops);
5229 }
5230 
5231 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5232   auto *T = dyn_cast<MDTuple>(&N);
5233   if (!T)
5234     return &N;
5235 
5236   if (none_of(T->operands(), isOldLoopArgument))
5237     return &N;
5238 
5239   SmallVector<Metadata *, 8> Ops;
5240   Ops.reserve(T->getNumOperands());
5241   for (Metadata *MD : T->operands())
5242     Ops.push_back(upgradeLoopArgument(MD));
5243 
5244   return MDTuple::get(T->getContext(), Ops);
5245 }
5246 
5247 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5248   Triple T(TT);
5249   // The only data layout upgrades needed for pre-GCN are setting the address
5250   // space of globals to 1.
5251   if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
5252       !DL.starts_with("G")) {
5253     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5254   }
5255 
5256   if (T.isRISCV64()) {
5257     // Make i32 a native type for 64-bit RISC-V.
5258     auto I = DL.find("-n64-");
5259     if (I != StringRef::npos)
5260       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5261     return DL.str();
5262   }
5263 
5264   std::string Res = DL.str();
5265   // AMDGCN data layout upgrades.
5266   if (T.isAMDGCN()) {
5267     // Define address spaces for constants.
5268     if (!DL.contains("-G") && !DL.starts_with("G"))
5269       Res.append(Res.empty() ? "G1" : "-G1");
5270 
5271     // Add missing non-integral declarations.
5272     // This goes before adding new address spaces to prevent incoherent string
5273     // values.
5274     if (!DL.contains("-ni") && !DL.starts_with("ni"))
5275       Res.append("-ni:7:8:9");
5276     // Update ni:7 to ni:7:8:9.
5277     if (DL.ends_with("ni:7"))
5278       Res.append(":8:9");
5279     if (DL.ends_with("ni:7:8"))
5280       Res.append(":9");
5281 
5282     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5283     // resources) An empty data layout has already been upgraded to G1 by now.
5284     if (!DL.contains("-p7") && !DL.starts_with("p7"))
5285       Res.append("-p7:160:256:256:32");
5286     if (!DL.contains("-p8") && !DL.starts_with("p8"))
5287       Res.append("-p8:128:128");
5288     if (!DL.contains("-p9") && !DL.starts_with("p9"))
5289       Res.append("-p9:192:256:256:32");
5290 
5291     return Res;
5292   }
5293 
5294   if (!T.isX86())
5295     return Res;
5296 
5297   // If the datalayout matches the expected format, add pointer size address
5298   // spaces to the datalayout.
5299   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5300   if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5301     SmallVector<StringRef, 4> Groups;
5302     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5303     if (R.match(Res, &Groups))
5304       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5305   }
5306 
5307   // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5308   // for i128 operations prior to this being reflected in the data layout, and
5309   // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5310   // boundaries, so although this is a breaking change, the upgrade is expected
5311   // to fix more IR than it breaks.
5312   // Intel MCU is an exception and uses 4-byte-alignment.
5313   if (!T.isOSIAMCU()) {
5314     std::string I128 = "-i128:128";
5315     if (StringRef Ref = Res; !Ref.contains(I128)) {
5316       SmallVector<StringRef, 4> Groups;
5317       Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5318       if (R.match(Res, &Groups))
5319         Res = (Groups[1] + I128 + Groups[3]).str();
5320     }
5321   }
5322 
5323   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5324   // Raising the alignment is safe because Clang did not produce f80 values in
5325   // the MSVC environment before this upgrade was added.
5326   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5327     StringRef Ref = Res;
5328     auto I = Ref.find("-f80:32-");
5329     if (I != StringRef::npos)
5330       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5331   }
5332 
5333   return Res;
5334 }
5335 
5336 void llvm::UpgradeAttributes(AttrBuilder &B) {
5337   StringRef FramePointer;
5338   Attribute A = B.getAttribute("no-frame-pointer-elim");
5339   if (A.isValid()) {
5340     // The value can be "true" or "false".
5341     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5342     B.removeAttribute("no-frame-pointer-elim");
5343   }
5344   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5345     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5346     if (FramePointer != "all")
5347       FramePointer = "non-leaf";
5348     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5349   }
5350   if (!FramePointer.empty())
5351     B.addAttribute("frame-pointer", FramePointer);
5352 
5353   A = B.getAttribute("null-pointer-is-valid");
5354   if (A.isValid()) {
5355     // The value can be "true" or "false".
5356     bool NullPointerIsValid = A.getValueAsString() == "true";
5357     B.removeAttribute("null-pointer-is-valid");
5358     if (NullPointerIsValid)
5359       B.addAttribute(Attribute::NullPointerIsValid);
5360   }
5361 }
5362 
5363 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5364   // clang.arc.attachedcall bundles are now required to have an operand.
5365   // If they don't, it's okay to drop them entirely: when there is an operand,
5366   // the "attachedcall" is meaningful and required, but without an operand,
5367   // it's just a marker NOP.  Dropping it merely prevents an optimization.
5368   erase_if(Bundles, [&](OperandBundleDef &OBD) {
5369     return OBD.getTag() == "clang.arc.attachedcall" &&
5370            OBD.inputs().empty();
5371   });
5372 }
5373