xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include <cstring>
45 
46 using namespace llvm;
47 
48 static cl::opt<bool>
49     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50                                 cl::desc("Disable autoupgrade of debug info"));
51 
rename(GlobalValue * GV)52 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53 
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
upgradePTESTIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)56 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
57                                   Function *&NewFn) {
58   // Check whether this is an old version of the function, which received
59   // v4f32 arguments.
60   Type *Arg0Type = F->getFunctionType()->getParamType(0);
61   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62     return false;
63 
64   // Yes, it's old, replace it with new version.
65   rename(F);
66   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67   return true;
68 }
69 
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
upgradeX86IntrinsicsWith8BitMask(Function * F,Intrinsic::ID IID,Function * & NewFn)72 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73                                              Function *&NewFn) {
74   // Check that the last argument is an i32.
75   Type *LastArgType = F->getFunctionType()->getParamType(
76      F->getFunctionType()->getNumParams() - 1);
77   if (!LastArgType->isIntegerTy(32))
78     return false;
79 
80   // Move this function aside and map down.
81   rename(F);
82   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83   return true;
84 }
85 
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
upgradeX86MaskedFPCompare(Function * F,Intrinsic::ID IID,Function * & NewFn)88 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89                                       Function *&NewFn) {
90   // Check if the return type is a vector.
91   if (F->getReturnType()->isVectorTy())
92     return false;
93 
94   rename(F);
95   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96   return true;
97 }
98 
upgradeX86BF16Intrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)99 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100                                     Function *&NewFn) {
101   if (F->getReturnType()->getScalarType()->isBFloatTy())
102     return false;
103 
104   rename(F);
105   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106   return true;
107 }
108 
upgradeX86BF16DPIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)109 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110                                       Function *&NewFn) {
111   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112     return false;
113 
114   rename(F);
115   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116   return true;
117 }
118 
shouldUpgradeX86Intrinsic(Function * F,StringRef Name)119 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120   // All of the intrinsics matches below should be marked with which llvm
121   // version started autoupgrading them. At some point in the future we would
122   // like to use this information to remove upgrade code for some older
123   // intrinsics. It is currently undecided how we will determine that future
124   // point.
125   if (Name.consume_front("avx."))
126     return (Name.starts_with("blend.p") ||        // Added in 3.7
127             Name == "cvt.ps2.pd.256" ||           // Added in 3.9
128             Name == "cvtdq2.pd.256" ||            // Added in 3.9
129             Name == "cvtdq2.ps.256" ||            // Added in 7.0
130             Name.starts_with("movnt.") ||         // Added in 3.2
131             Name.starts_with("sqrt.p") ||         // Added in 7.0
132             Name.starts_with("storeu.") ||        // Added in 3.9
133             Name.starts_with("vbroadcast.s") ||   // Added in 3.5
134             Name.starts_with("vbroadcastf128") || // Added in 4.0
135             Name.starts_with("vextractf128.") ||  // Added in 3.7
136             Name.starts_with("vinsertf128.") ||   // Added in 3.7
137             Name.starts_with("vperm2f128.") ||    // Added in 6.0
138             Name.starts_with("vpermil."));        // Added in 3.1
139 
140   if (Name.consume_front("avx2."))
141     return (Name == "movntdqa" ||             // Added in 5.0
142             Name.starts_with("pabs.") ||      // Added in 6.0
143             Name.starts_with("padds.") ||     // Added in 8.0
144             Name.starts_with("paddus.") ||    // Added in 8.0
145             Name.starts_with("pblendd.") ||   // Added in 3.7
146             Name == "pblendw" ||              // Added in 3.7
147             Name.starts_with("pbroadcast") || // Added in 3.8
148             Name.starts_with("pcmpeq.") ||    // Added in 3.1
149             Name.starts_with("pcmpgt.") ||    // Added in 3.1
150             Name.starts_with("pmax") ||       // Added in 3.9
151             Name.starts_with("pmin") ||       // Added in 3.9
152             Name.starts_with("pmovsx") ||     // Added in 3.9
153             Name.starts_with("pmovzx") ||     // Added in 3.9
154             Name == "pmul.dq" ||              // Added in 7.0
155             Name == "pmulu.dq" ||             // Added in 7.0
156             Name.starts_with("psll.dq") ||    // Added in 3.7
157             Name.starts_with("psrl.dq") ||    // Added in 3.7
158             Name.starts_with("psubs.") ||     // Added in 8.0
159             Name.starts_with("psubus.") ||    // Added in 8.0
160             Name.starts_with("vbroadcast") || // Added in 3.8
161             Name == "vbroadcasti128" ||       // Added in 3.7
162             Name == "vextracti128" ||         // Added in 3.7
163             Name == "vinserti128" ||          // Added in 3.7
164             Name == "vperm2i128");            // Added in 6.0
165 
166   if (Name.consume_front("avx512.")) {
167     if (Name.consume_front("mask."))
168       // 'avx512.mask.*'
169       return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
170               Name.starts_with("and.") ||        // Added in 3.9
171               Name.starts_with("andn.") ||       // Added in 3.9
172               Name.starts_with("broadcast.s") || // Added in 3.9
173               Name.starts_with("broadcastf32x4.") || // Added in 6.0
174               Name.starts_with("broadcastf32x8.") || // Added in 6.0
175               Name.starts_with("broadcastf64x2.") || // Added in 6.0
176               Name.starts_with("broadcastf64x4.") || // Added in 6.0
177               Name.starts_with("broadcasti32x4.") || // Added in 6.0
178               Name.starts_with("broadcasti32x8.") || // Added in 6.0
179               Name.starts_with("broadcasti64x2.") || // Added in 6.0
180               Name.starts_with("broadcasti64x4.") || // Added in 6.0
181               Name.starts_with("cmp.b") ||           // Added in 5.0
182               Name.starts_with("cmp.d") ||           // Added in 5.0
183               Name.starts_with("cmp.q") ||           // Added in 5.0
184               Name.starts_with("cmp.w") ||           // Added in 5.0
185               Name.starts_with("compress.b") ||      // Added in 9.0
186               Name.starts_with("compress.d") ||      // Added in 9.0
187               Name.starts_with("compress.p") ||      // Added in 9.0
188               Name.starts_with("compress.q") ||      // Added in 9.0
189               Name.starts_with("compress.store.") || // Added in 7.0
190               Name.starts_with("compress.w") ||      // Added in 9.0
191               Name.starts_with("conflict.") ||       // Added in 9.0
192               Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
193               Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
194               Name == "cvtpd2dq.256" ||              // Added in 7.0
195               Name == "cvtpd2ps.256" ||              // Added in 7.0
196               Name == "cvtps2pd.128" ||              // Added in 7.0
197               Name == "cvtps2pd.256" ||              // Added in 7.0
198               Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
199               Name == "cvtqq2ps.256" ||              // Added in 9.0
200               Name == "cvtqq2ps.512" ||              // Added in 9.0
201               Name == "cvttpd2dq.256" ||             // Added in 7.0
202               Name == "cvttps2dq.128" ||             // Added in 7.0
203               Name == "cvttps2dq.256" ||             // Added in 7.0
204               Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
205               Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
206               Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
207               Name == "cvtuqq2ps.256" ||             // Added in 9.0
208               Name == "cvtuqq2ps.512" ||             // Added in 9.0
209               Name.starts_with("dbpsadbw.") ||       // Added in 7.0
210               Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
211               Name.starts_with("expand.b") || // Added in 9.0
212               Name.starts_with("expand.d") || // Added in 9.0
213               Name.starts_with("expand.load.") || // Added in 7.0
214               Name.starts_with("expand.p") ||     // Added in 9.0
215               Name.starts_with("expand.q") ||     // Added in 9.0
216               Name.starts_with("expand.w") ||     // Added in 9.0
217               Name.starts_with("fpclass.p") ||    // Added in 7.0
218               Name.starts_with("insert") ||       // Added in 4.0
219               Name.starts_with("load.") ||        // Added in 3.9
220               Name.starts_with("loadu.") ||       // Added in 3.9
221               Name.starts_with("lzcnt.") ||       // Added in 5.0
222               Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
223               Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
224               Name.starts_with("movddup") ||     // Added in 3.9
225               Name.starts_with("move.s") ||      // Added in 4.0
226               Name.starts_with("movshdup") ||    // Added in 3.9
227               Name.starts_with("movsldup") ||    // Added in 3.9
228               Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
229               Name.starts_with("or.") ||         // Added in 3.9
230               Name.starts_with("pabs.") ||       // Added in 6.0
231               Name.starts_with("packssdw.") ||   // Added in 5.0
232               Name.starts_with("packsswb.") ||   // Added in 5.0
233               Name.starts_with("packusdw.") ||   // Added in 5.0
234               Name.starts_with("packuswb.") ||   // Added in 5.0
235               Name.starts_with("padd.") ||       // Added in 4.0
236               Name.starts_with("padds.") ||      // Added in 8.0
237               Name.starts_with("paddus.") ||     // Added in 8.0
238               Name.starts_with("palignr.") ||    // Added in 3.9
239               Name.starts_with("pand.") ||       // Added in 3.9
240               Name.starts_with("pandn.") ||      // Added in 3.9
241               Name.starts_with("pavg") ||        // Added in 6.0
242               Name.starts_with("pbroadcast") ||  // Added in 6.0
243               Name.starts_with("pcmpeq.") ||     // Added in 3.9
244               Name.starts_with("pcmpgt.") ||     // Added in 3.9
245               Name.starts_with("perm.df.") ||    // Added in 3.9
246               Name.starts_with("perm.di.") ||    // Added in 3.9
247               Name.starts_with("permvar.") ||    // Added in 7.0
248               Name.starts_with("pmaddubs.w.") || // Added in 7.0
249               Name.starts_with("pmaddw.d.") ||   // Added in 7.0
250               Name.starts_with("pmax") ||        // Added in 4.0
251               Name.starts_with("pmin") ||        // Added in 4.0
252               Name == "pmov.qd.256" ||           // Added in 9.0
253               Name == "pmov.qd.512" ||           // Added in 9.0
254               Name == "pmov.wb.256" ||           // Added in 9.0
255               Name == "pmov.wb.512" ||           // Added in 9.0
256               Name.starts_with("pmovsx") ||      // Added in 4.0
257               Name.starts_with("pmovzx") ||      // Added in 4.0
258               Name.starts_with("pmul.dq.") ||    // Added in 4.0
259               Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260               Name.starts_with("pmulh.w.") ||    // Added in 7.0
261               Name.starts_with("pmulhu.w.") ||   // Added in 7.0
262               Name.starts_with("pmull.") ||      // Added in 4.0
263               Name.starts_with("pmultishift.qb.") || // Added in 8.0
264               Name.starts_with("pmulu.dq.") ||       // Added in 4.0
265               Name.starts_with("por.") ||            // Added in 3.9
266               Name.starts_with("prol.") ||           // Added in 8.0
267               Name.starts_with("prolv.") ||          // Added in 8.0
268               Name.starts_with("pror.") ||           // Added in 8.0
269               Name.starts_with("prorv.") ||          // Added in 8.0
270               Name.starts_with("pshuf.b.") ||        // Added in 4.0
271               Name.starts_with("pshuf.d.") ||        // Added in 3.9
272               Name.starts_with("pshufh.w.") ||       // Added in 3.9
273               Name.starts_with("pshufl.w.") ||       // Added in 3.9
274               Name.starts_with("psll.d") ||          // Added in 4.0
275               Name.starts_with("psll.q") ||          // Added in 4.0
276               Name.starts_with("psll.w") ||          // Added in 4.0
277               Name.starts_with("pslli") ||           // Added in 4.0
278               Name.starts_with("psllv") ||           // Added in 4.0
279               Name.starts_with("psra.d") ||          // Added in 4.0
280               Name.starts_with("psra.q") ||          // Added in 4.0
281               Name.starts_with("psra.w") ||          // Added in 4.0
282               Name.starts_with("psrai") ||           // Added in 4.0
283               Name.starts_with("psrav") ||           // Added in 4.0
284               Name.starts_with("psrl.d") ||          // Added in 4.0
285               Name.starts_with("psrl.q") ||          // Added in 4.0
286               Name.starts_with("psrl.w") ||          // Added in 4.0
287               Name.starts_with("psrli") ||           // Added in 4.0
288               Name.starts_with("psrlv") ||           // Added in 4.0
289               Name.starts_with("psub.") ||           // Added in 4.0
290               Name.starts_with("psubs.") ||          // Added in 8.0
291               Name.starts_with("psubus.") ||         // Added in 8.0
292               Name.starts_with("pternlog.") ||       // Added in 7.0
293               Name.starts_with("punpckh") ||         // Added in 3.9
294               Name.starts_with("punpckl") ||         // Added in 3.9
295               Name.starts_with("pxor.") ||           // Added in 3.9
296               Name.starts_with("shuf.f") ||          // Added in 6.0
297               Name.starts_with("shuf.i") ||          // Added in 6.0
298               Name.starts_with("shuf.p") ||          // Added in 4.0
299               Name.starts_with("sqrt.p") ||          // Added in 7.0
300               Name.starts_with("store.b.") ||        // Added in 3.9
301               Name.starts_with("store.d.") ||        // Added in 3.9
302               Name.starts_with("store.p") ||         // Added in 3.9
303               Name.starts_with("store.q.") ||        // Added in 3.9
304               Name.starts_with("store.w.") ||        // Added in 3.9
305               Name == "store.ss" ||                  // Added in 7.0
306               Name.starts_with("storeu.") ||         // Added in 3.9
307               Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
308               Name.starts_with("ucmp.") ||       // Added in 5.0
309               Name.starts_with("unpckh.") ||     // Added in 3.9
310               Name.starts_with("unpckl.") ||     // Added in 3.9
311               Name.starts_with("valign.") ||     // Added in 4.0
312               Name == "vcvtph2ps.128" ||         // Added in 11.0
313               Name == "vcvtph2ps.256" ||         // Added in 11.0
314               Name.starts_with("vextract") ||    // Added in 4.0
315               Name.starts_with("vfmadd.") ||     // Added in 7.0
316               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
317               Name.starts_with("vfnmadd.") ||    // Added in 7.0
318               Name.starts_with("vfnmsub.") ||    // Added in 7.0
319               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
320               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
321               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
322               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
323               Name.starts_with("vpermi2var.") || // Added in 7.0
324               Name.starts_with("vpermil.p") ||   // Added in 3.9
325               Name.starts_with("vpermilvar.") || // Added in 4.0
326               Name.starts_with("vpermt2var.") || // Added in 7.0
327               Name.starts_with("vpmadd52") ||    // Added in 7.0
328               Name.starts_with("vpshld.") ||     // Added in 7.0
329               Name.starts_with("vpshldv.") ||    // Added in 8.0
330               Name.starts_with("vpshrd.") ||     // Added in 7.0
331               Name.starts_with("vpshrdv.") ||    // Added in 8.0
332               Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333               Name.starts_with("xor."));           // Added in 3.9
334 
335     if (Name.consume_front("mask3."))
336       // 'avx512.mask3.*'
337       return (Name.starts_with("vfmadd.") ||    // Added in 7.0
338               Name.starts_with("vfmaddsub.") || // Added in 7.0
339               Name.starts_with("vfmsub.") ||    // Added in 7.0
340               Name.starts_with("vfmsubadd.") || // Added in 7.0
341               Name.starts_with("vfnmsub."));    // Added in 7.0
342 
343     if (Name.consume_front("maskz."))
344       // 'avx512.maskz.*'
345       return (Name.starts_with("pternlog.") ||   // Added in 7.0
346               Name.starts_with("vfmadd.") ||     // Added in 7.0
347               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
348               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
349               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
350               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
351               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
352               Name.starts_with("vpermt2var.") || // Added in 7.0
353               Name.starts_with("vpmadd52") ||    // Added in 7.0
354               Name.starts_with("vpshldv.") ||    // Added in 8.0
355               Name.starts_with("vpshrdv."));     // Added in 8.0
356 
357     // 'avx512.*'
358     return (Name == "movntdqa" ||               // Added in 5.0
359             Name == "pmul.dq.512" ||            // Added in 7.0
360             Name == "pmulu.dq.512" ||           // Added in 7.0
361             Name.starts_with("broadcastm") ||   // Added in 6.0
362             Name.starts_with("cmp.p") ||        // Added in 12.0
363             Name.starts_with("cvtb2mask.") ||   // Added in 7.0
364             Name.starts_with("cvtd2mask.") ||   // Added in 7.0
365             Name.starts_with("cvtmask2") ||     // Added in 5.0
366             Name.starts_with("cvtq2mask.") ||   // Added in 7.0
367             Name == "cvtusi2sd" ||              // Added in 7.0
368             Name.starts_with("cvtw2mask.") ||   // Added in 7.0
369             Name == "kand.w" ||                 // Added in 7.0
370             Name == "kandn.w" ||                // Added in 7.0
371             Name == "knot.w" ||                 // Added in 7.0
372             Name == "kor.w" ||                  // Added in 7.0
373             Name == "kortestc.w" ||             // Added in 7.0
374             Name == "kortestz.w" ||             // Added in 7.0
375             Name.starts_with("kunpck") ||       // added in 6.0
376             Name == "kxnor.w" ||                // Added in 7.0
377             Name == "kxor.w" ||                 // Added in 7.0
378             Name.starts_with("padds.") ||       // Added in 8.0
379             Name.starts_with("pbroadcast") ||   // Added in 3.9
380             Name.starts_with("prol") ||         // Added in 8.0
381             Name.starts_with("pror") ||         // Added in 8.0
382             Name.starts_with("psll.dq") ||      // Added in 3.9
383             Name.starts_with("psrl.dq") ||      // Added in 3.9
384             Name.starts_with("psubs.") ||       // Added in 8.0
385             Name.starts_with("ptestm") ||       // Added in 6.0
386             Name.starts_with("ptestnm") ||      // Added in 6.0
387             Name.starts_with("storent.") ||     // Added in 3.9
388             Name.starts_with("vbroadcast.s") || // Added in 7.0
389             Name.starts_with("vpshld.") ||      // Added in 8.0
390             Name.starts_with("vpshrd."));       // Added in 8.0
391   }
392 
393   if (Name.consume_front("fma."))
394     return (Name.starts_with("vfmadd.") ||    // Added in 7.0
395             Name.starts_with("vfmsub.") ||    // Added in 7.0
396             Name.starts_with("vfmsubadd.") || // Added in 7.0
397             Name.starts_with("vfnmadd.") ||   // Added in 7.0
398             Name.starts_with("vfnmsub."));    // Added in 7.0
399 
400   if (Name.consume_front("fma4."))
401     return Name.starts_with("vfmadd.s"); // Added in 7.0
402 
403   if (Name.consume_front("sse."))
404     return (Name == "add.ss" ||            // Added in 4.0
405             Name == "cvtsi2ss" ||          // Added in 7.0
406             Name == "cvtsi642ss" ||        // Added in 7.0
407             Name == "div.ss" ||            // Added in 4.0
408             Name == "mul.ss" ||            // Added in 4.0
409             Name.starts_with("sqrt.p") ||  // Added in 7.0
410             Name == "sqrt.ss" ||           // Added in 7.0
411             Name.starts_with("storeu.") || // Added in 3.9
412             Name == "sub.ss");             // Added in 4.0
413 
414   if (Name.consume_front("sse2."))
415     return (Name == "add.sd" ||            // Added in 4.0
416             Name == "cvtdq2pd" ||          // Added in 3.9
417             Name == "cvtdq2ps" ||          // Added in 7.0
418             Name == "cvtps2pd" ||          // Added in 3.9
419             Name == "cvtsi2sd" ||          // Added in 7.0
420             Name == "cvtsi642sd" ||        // Added in 7.0
421             Name == "cvtss2sd" ||          // Added in 7.0
422             Name == "div.sd" ||            // Added in 4.0
423             Name == "mul.sd" ||            // Added in 4.0
424             Name.starts_with("padds.") ||  // Added in 8.0
425             Name.starts_with("paddus.") || // Added in 8.0
426             Name.starts_with("pcmpeq.") || // Added in 3.1
427             Name.starts_with("pcmpgt.") || // Added in 3.1
428             Name == "pmaxs.w" ||           // Added in 3.9
429             Name == "pmaxu.b" ||           // Added in 3.9
430             Name == "pmins.w" ||           // Added in 3.9
431             Name == "pminu.b" ||           // Added in 3.9
432             Name == "pmulu.dq" ||          // Added in 7.0
433             Name.starts_with("pshuf") ||   // Added in 3.9
434             Name.starts_with("psll.dq") || // Added in 3.7
435             Name.starts_with("psrl.dq") || // Added in 3.7
436             Name.starts_with("psubs.") ||  // Added in 8.0
437             Name.starts_with("psubus.") || // Added in 8.0
438             Name.starts_with("sqrt.p") ||  // Added in 7.0
439             Name == "sqrt.sd" ||           // Added in 7.0
440             Name == "storel.dq" ||         // Added in 3.9
441             Name.starts_with("storeu.") || // Added in 3.9
442             Name == "sub.sd");             // Added in 4.0
443 
444   if (Name.consume_front("sse41."))
445     return (Name.starts_with("blendp") || // Added in 3.7
446             Name == "movntdqa" ||         // Added in 5.0
447             Name == "pblendw" ||          // Added in 3.7
448             Name == "pmaxsb" ||           // Added in 3.9
449             Name == "pmaxsd" ||           // Added in 3.9
450             Name == "pmaxud" ||           // Added in 3.9
451             Name == "pmaxuw" ||           // Added in 3.9
452             Name == "pminsb" ||           // Added in 3.9
453             Name == "pminsd" ||           // Added in 3.9
454             Name == "pminud" ||           // Added in 3.9
455             Name == "pminuw" ||           // Added in 3.9
456             Name.starts_with("pmovsx") || // Added in 3.8
457             Name.starts_with("pmovzx") || // Added in 3.9
458             Name == "pmuldq");            // Added in 7.0
459 
460   if (Name.consume_front("sse42."))
461     return Name == "crc32.64.8"; // Added in 3.4
462 
463   if (Name.consume_front("sse4a."))
464     return Name.starts_with("movnt."); // Added in 3.9
465 
466   if (Name.consume_front("ssse3."))
467     return (Name == "pabs.b.128" || // Added in 6.0
468             Name == "pabs.d.128" || // Added in 6.0
469             Name == "pabs.w.128");  // Added in 6.0
470 
471   if (Name.consume_front("xop."))
472     return (Name == "vpcmov" ||          // Added in 3.8
473             Name == "vpcmov.256" ||      // Added in 5.0
474             Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475             Name.starts_with("vprot"));  // Added in 8.0
476 
477   return (Name == "addcarry.u32" ||        // Added in 8.0
478           Name == "addcarry.u64" ||        // Added in 8.0
479           Name == "addcarryx.u32" ||       // Added in 8.0
480           Name == "addcarryx.u64" ||       // Added in 8.0
481           Name == "subborrow.u32" ||       // Added in 8.0
482           Name == "subborrow.u64" ||       // Added in 8.0
483           Name.starts_with("vcvtph2ps.")); // Added in 11.0
484 }
485 
upgradeX86IntrinsicFunction(Function * F,StringRef Name,Function * & NewFn)486 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
487                                         Function *&NewFn) {
488   // Only handle intrinsics that start with "x86.".
489   if (!Name.consume_front("x86."))
490     return false;
491 
492   if (shouldUpgradeX86Intrinsic(F, Name)) {
493     NewFn = nullptr;
494     return true;
495   }
496 
497   if (Name == "rdtscp") { // Added in 8.0
498     // If this intrinsic has 0 operands, it's the new version.
499     if (F->getFunctionType()->getNumParams() == 0)
500       return false;
501 
502     rename(F);
503     NewFn = Intrinsic::getDeclaration(F->getParent(),
504                                       Intrinsic::x86_rdtscp);
505     return true;
506   }
507 
508   Intrinsic::ID ID;
509 
510   // SSE4.1 ptest functions may have an old signature.
511   if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512     ID = StringSwitch<Intrinsic::ID>(Name)
513              .Case("c", Intrinsic::x86_sse41_ptestc)
514              .Case("z", Intrinsic::x86_sse41_ptestz)
515              .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516              .Default(Intrinsic::not_intrinsic);
517     if (ID != Intrinsic::not_intrinsic)
518       return upgradePTESTIntrinsic(F, ID, NewFn);
519 
520     return false;
521   }
522 
523   // Several blend and other instructions with masks used the wrong number of
524   // bits.
525 
526   // Added in 3.6
527   ID = StringSwitch<Intrinsic::ID>(Name)
528            .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529            .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530            .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531            .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532            .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533            .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534            .Default(Intrinsic::not_intrinsic);
535   if (ID != Intrinsic::not_intrinsic)
536     return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537 
538   if (Name.consume_front("avx512.mask.cmp.")) {
539     // Added in 7.0
540     ID = StringSwitch<Intrinsic::ID>(Name)
541              .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542              .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543              .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544              .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545              .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546              .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547              .Default(Intrinsic::not_intrinsic);
548     if (ID != Intrinsic::not_intrinsic)
549       return upgradeX86MaskedFPCompare(F, ID, NewFn);
550     return false; // No other 'x86.avx523.mask.cmp.*'.
551   }
552 
553   if (Name.consume_front("avx512bf16.")) {
554     // Added in 9.0
555     ID = StringSwitch<Intrinsic::ID>(Name)
556              .Case("cvtne2ps2bf16.128",
557                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558              .Case("cvtne2ps2bf16.256",
559                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560              .Case("cvtne2ps2bf16.512",
561                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562              .Case("mask.cvtneps2bf16.128",
563                    Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564              .Case("cvtneps2bf16.256",
565                    Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566              .Case("cvtneps2bf16.512",
567                    Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568              .Default(Intrinsic::not_intrinsic);
569     if (ID != Intrinsic::not_intrinsic)
570       return upgradeX86BF16Intrinsic(F, ID, NewFn);
571 
572     // Added in 9.0
573     ID = StringSwitch<Intrinsic::ID>(Name)
574              .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575              .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576              .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577              .Default(Intrinsic::not_intrinsic);
578     if (ID != Intrinsic::not_intrinsic)
579       return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580     return false; // No other 'x86.avx512bf16.*'.
581   }
582 
583   if (Name.consume_front("xop.")) {
584     Intrinsic::ID ID = Intrinsic::not_intrinsic;
585     if (Name.starts_with("vpermil2")) { // Added in 3.9
586       // Upgrade any XOP PERMIL2 index operand still using a float/double
587       // vector.
588       auto Idx = F->getFunctionType()->getParamType(2);
589       if (Idx->isFPOrFPVectorTy()) {
590         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591         unsigned EltSize = Idx->getScalarSizeInBits();
592         if (EltSize == 64 && IdxSize == 128)
593           ID = Intrinsic::x86_xop_vpermil2pd;
594         else if (EltSize == 32 && IdxSize == 128)
595           ID = Intrinsic::x86_xop_vpermil2ps;
596         else if (EltSize == 64 && IdxSize == 256)
597           ID = Intrinsic::x86_xop_vpermil2pd_256;
598         else
599           ID = Intrinsic::x86_xop_vpermil2ps_256;
600       }
601     } else if (F->arg_size() == 2)
602       // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603       ID = StringSwitch<Intrinsic::ID>(Name)
604                .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605                .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606                .Default(Intrinsic::not_intrinsic);
607 
608     if (ID != Intrinsic::not_intrinsic) {
609       rename(F);
610       NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611       return true;
612     }
613     return false; // No other 'x86.xop.*'
614   }
615 
616   if (Name == "seh.recoverfp") {
617     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618     return true;
619   }
620 
621   return false;
622 }
623 
624 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
upgradeArmOrAarch64IntrinsicFunction(bool IsArm,Function * F,StringRef Name,Function * & NewFn)626 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627                                                  StringRef Name,
628                                                  Function *&NewFn) {
629   if (Name.starts_with("rbit")) {
630     // '(arm|aarch64).rbit'.
631     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632                                       F->arg_begin()->getType());
633     return true;
634   }
635 
636   if (Name == "thread.pointer") {
637     // '(arm|aarch64).thread.pointer'.
638     NewFn =
639         Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640     return true;
641   }
642 
643   bool Neon = Name.consume_front("neon.");
644   if (Neon) {
645     // '(arm|aarch64).neon.*'.
646     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647     // v16i8 respectively.
648     if (Name.consume_front("bfdot.")) {
649       // (arm|aarch64).neon.bfdot.*'.
650       Intrinsic::ID ID =
651           StringSwitch<Intrinsic::ID>(Name)
652               .Cases("v2f32.v8i8", "v4f32.v16i8",
653                      IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654                            : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
655               .Default(Intrinsic::not_intrinsic);
656       if (ID != Intrinsic::not_intrinsic) {
657         size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658         assert((OperandWidth == 64 || OperandWidth == 128) &&
659                "Unexpected operand width");
660         LLVMContext &Ctx = F->getParent()->getContext();
661         std::array<Type *, 2> Tys{
662             {F->getReturnType(),
663              FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665         return true;
666       }
667       return false; // No other '(arm|aarch64).neon.bfdot.*'.
668     }
669 
670     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671     // anymore and accept v8bf16 instead of v16i8.
672     if (Name.consume_front("bfm")) {
673       // (arm|aarch64).neon.bfm*'.
674       if (Name.consume_back(".v4f32.v16i8")) {
675         // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
676         Intrinsic::ID ID =
677             StringSwitch<Intrinsic::ID>(Name)
678                 .Case("mla",
679                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681                 .Case("lalb",
682                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684                 .Case("lalt",
685                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
687                 .Default(Intrinsic::not_intrinsic);
688         if (ID != Intrinsic::not_intrinsic) {
689           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690           return true;
691         }
692         return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693       }
694       return false; // No other '(arm|aarch64).neon.bfm*.
695     }
696     // Continue on to Aarch64 Neon or Arm Neon.
697   }
698   // Continue on to Arm or Aarch64.
699 
700   if (IsArm) {
701     // 'arm.*'.
702     if (Neon) {
703       // 'arm.neon.*'.
704       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
705                              .StartsWith("vclz.", Intrinsic::ctlz)
706                              .StartsWith("vcnt.", Intrinsic::ctpop)
707                              .StartsWith("vqadds.", Intrinsic::sadd_sat)
708                              .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709                              .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710                              .StartsWith("vqsubu.", Intrinsic::usub_sat)
711                              .Default(Intrinsic::not_intrinsic);
712       if (ID != Intrinsic::not_intrinsic) {
713         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714                                           F->arg_begin()->getType());
715         return true;
716       }
717 
718       if (Name.consume_front("vst")) {
719         // 'arm.neon.vst*'.
720         static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
721         SmallVector<StringRef, 2> Groups;
722         if (vstRegex.match(Name, &Groups)) {
723           static const Intrinsic::ID StoreInts[] = {
724               Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725               Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726 
727           static const Intrinsic::ID StoreLaneInts[] = {
728               Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729               Intrinsic::arm_neon_vst4lane};
730 
731           auto fArgs = F->getFunctionType()->params();
732           Type *Tys[] = {fArgs[0], fArgs[1]};
733           if (Groups[1].size() == 1)
734             NewFn = Intrinsic::getDeclaration(F->getParent(),
735                                               StoreInts[fArgs.size() - 3], Tys);
736           else
737             NewFn = Intrinsic::getDeclaration(
738                 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739           return true;
740         }
741         return false; // No other 'arm.neon.vst*'.
742       }
743 
744       return false; // No other 'arm.neon.*'.
745     }
746 
747     if (Name.consume_front("mve.")) {
748       // 'arm.mve.*'.
749       if (Name == "vctp64") {
750         if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751           // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752           // the function and deal with it below in UpgradeIntrinsicCall.
753           rename(F);
754           return true;
755         }
756         return false; // Not 'arm.mve.vctp64'.
757       }
758 
759       // These too are changed to accept a v2i1 instead of the old v4i1.
760       if (Name.consume_back(".v4i1")) {
761         // 'arm.mve.*.v4i1'.
762         if (Name.consume_back(".predicated.v2i64.v4i32"))
763           // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764           return Name == "mull.int" || Name == "vqdmull";
765 
766         if (Name.consume_back(".v2i64")) {
767           // 'arm.mve.*.v2i64.v4i1'
768           bool IsGather = Name.consume_front("vldr.gather.");
769           if (IsGather || Name.consume_front("vstr.scatter.")) {
770             if (Name.consume_front("base.")) {
771               // Optional 'wb.' prefix.
772               Name.consume_front("wb.");
773               // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774               // predicated.v2i64.v2i64.v4i1'.
775               return Name == "predicated.v2i64";
776             }
777 
778             if (Name.consume_front("offset.predicated."))
779               return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780                      Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781 
782             // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783             return false;
784           }
785 
786           return false; // No other 'arm.mve.*.v2i64.v4i1'.
787         }
788         return false; // No other 'arm.mve.*.v4i1'.
789       }
790       return false; // No other 'arm.mve.*'.
791     }
792 
793     if (Name.consume_front("cde.vcx")) {
794       // 'arm.cde.vcx*'.
795       if (Name.consume_back(".predicated.v2i64.v4i1"))
796         // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797         return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798                Name == "3q" || Name == "3qa";
799 
800       return false; // No other 'arm.cde.vcx*'.
801     }
802   } else {
803     // 'aarch64.*'.
804     if (Neon) {
805       // 'aarch64.neon.*'.
806       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
807                              .StartsWith("frintn", Intrinsic::roundeven)
808                              .StartsWith("rbit", Intrinsic::bitreverse)
809                              .Default(Intrinsic::not_intrinsic);
810       if (ID != Intrinsic::not_intrinsic) {
811         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812                                           F->arg_begin()->getType());
813         return true;
814       }
815 
816       if (Name.starts_with("addp")) {
817         // 'aarch64.neon.addp*'.
818         if (F->arg_size() != 2)
819           return false; // Invalid IR.
820         VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821         if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822           NewFn = Intrinsic::getDeclaration(F->getParent(),
823                                             Intrinsic::aarch64_neon_faddp, Ty);
824           return true;
825         }
826       }
827       return false; // No other 'aarch64.neon.*'.
828     }
829     if (Name.consume_front("sve.")) {
830       // 'aarch64.sve.*'.
831       if (Name.consume_front("bf")) {
832         if (Name.consume_back(".lane")) {
833           // 'aarch64.sve.bf*.lane'.
834           Intrinsic::ID ID =
835               StringSwitch<Intrinsic::ID>(Name)
836                   .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837                   .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838                   .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
839                   .Default(Intrinsic::not_intrinsic);
840           if (ID != Intrinsic::not_intrinsic) {
841             NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842             return true;
843           }
844           return false; // No other 'aarch64.sve.bf*.lane'.
845         }
846         return false; // No other 'aarch64.sve.bf*'.
847       }
848 
849       if (Name.consume_front("addqv")) {
850         // 'aarch64.sve.addqv'.
851         if (!F->getReturnType()->isFPOrFPVectorTy())
852           return false;
853 
854         auto Args = F->getFunctionType()->params();
855         Type *Tys[] = {F->getReturnType(), Args[1]};
856         NewFn = Intrinsic::getDeclaration(F->getParent(),
857                                           Intrinsic::aarch64_sve_faddqv, Tys);
858         return true;
859       }
860 
861       if (Name.consume_front("ld")) {
862         // 'aarch64.sve.ld*'.
863         static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864         if (LdRegex.match(Name)) {
865           Type *ScalarTy =
866               cast<VectorType>(F->getReturnType())->getElementType();
867           ElementCount EC =
868               cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869           Type *Ty = VectorType::get(ScalarTy, EC);
870           static const Intrinsic::ID LoadIDs[] = {
871               Intrinsic::aarch64_sve_ld2_sret,
872               Intrinsic::aarch64_sve_ld3_sret,
873               Intrinsic::aarch64_sve_ld4_sret,
874           };
875           NewFn = Intrinsic::getDeclaration(F->getParent(),
876                                             LoadIDs[Name[0] - '2'], Ty);
877           return true;
878         }
879         return false; // No other 'aarch64.sve.ld*'.
880       }
881 
882       if (Name.consume_front("tuple.")) {
883         // 'aarch64.sve.tuple.*'.
884         if (Name.starts_with("get")) {
885           // 'aarch64.sve.tuple.get*'.
886           Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887           NewFn = Intrinsic::getDeclaration(F->getParent(),
888                                             Intrinsic::vector_extract, Tys);
889           return true;
890         }
891 
892         if (Name.starts_with("set")) {
893           // 'aarch64.sve.tuple.set*'.
894           auto Args = F->getFunctionType()->params();
895           Type *Tys[] = {Args[0], Args[2], Args[1]};
896           NewFn = Intrinsic::getDeclaration(F->getParent(),
897                                             Intrinsic::vector_insert, Tys);
898           return true;
899         }
900 
901         static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902         if (CreateTupleRegex.match(Name)) {
903           // 'aarch64.sve.tuple.create*'.
904           auto Args = F->getFunctionType()->params();
905           Type *Tys[] = {F->getReturnType(), Args[1]};
906           NewFn = Intrinsic::getDeclaration(F->getParent(),
907                                             Intrinsic::vector_insert, Tys);
908           return true;
909         }
910         return false; // No other 'aarch64.sve.tuple.*'.
911       }
912       return false; // No other 'aarch64.sve.*'.
913     }
914   }
915   return false; // No other 'arm.*', 'aarch64.*'.
916 }
917 
shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)918 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
919   if (Name.consume_front("abs."))
920     return StringSwitch<Intrinsic::ID>(Name)
921         .Case("bf16", Intrinsic::nvvm_abs_bf16)
922         .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
923         .Default(Intrinsic::not_intrinsic);
924 
925   if (Name.consume_front("fma.rn."))
926     return StringSwitch<Intrinsic::ID>(Name)
927         .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928         .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929         .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930         .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931         .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932         .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933         .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934         .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935         .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936         .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937         .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938         .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
939         .Default(Intrinsic::not_intrinsic);
940 
941   if (Name.consume_front("fmax."))
942     return StringSwitch<Intrinsic::ID>(Name)
943         .Case("bf16", Intrinsic::nvvm_fmax_bf16)
944         .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945         .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946         .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949         .Case("ftz.nan.xorsign.abs.bf16",
950               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951         .Case("ftz.nan.xorsign.abs.bf16x2",
952               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954         .Case("ftz.xorsign.abs.bf16x2",
955               Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956         .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957         .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959         .Case("nan.xorsign.abs.bf16x2",
960               Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
963         .Default(Intrinsic::not_intrinsic);
964 
965   if (Name.consume_front("fmin."))
966     return StringSwitch<Intrinsic::ID>(Name)
967         .Case("bf16", Intrinsic::nvvm_fmin_bf16)
968         .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969         .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970         .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973         .Case("ftz.nan.xorsign.abs.bf16",
974               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975         .Case("ftz.nan.xorsign.abs.bf16x2",
976               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978         .Case("ftz.xorsign.abs.bf16x2",
979               Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980         .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981         .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983         .Case("nan.xorsign.abs.bf16x2",
984               Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
987         .Default(Intrinsic::not_intrinsic);
988 
989   if (Name.consume_front("neg."))
990     return StringSwitch<Intrinsic::ID>(Name)
991         .Case("bf16", Intrinsic::nvvm_neg_bf16)
992         .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
993         .Default(Intrinsic::not_intrinsic);
994 
995   return Intrinsic::not_intrinsic;
996 }
997 
upgradeIntrinsicFunction1(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)998 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
999                                       bool CanUpgradeDebugIntrinsicsToRecords) {
1000   assert(F && "Illegal to upgrade a non-existent Function.");
1001 
1002   StringRef Name = F->getName();
1003 
1004   // Quickly eliminate it, if it's not a candidate.
1005   if (!Name.consume_front("llvm.") || Name.empty())
1006     return false;
1007 
1008   switch (Name[0]) {
1009   default: break;
1010   case 'a': {
1011     bool IsArm = Name.consume_front("arm.");
1012     if (IsArm || Name.consume_front("aarch64.")) {
1013       if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014         return true;
1015       break;
1016     }
1017 
1018     if (Name.consume_front("amdgcn.")) {
1019       if (Name == "alignbit") {
1020         // Target specific intrinsic became redundant
1021         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022                                           {F->getReturnType()});
1023         return true;
1024       }
1025 
1026       if (Name.consume_front("atomic.")) {
1027         if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028           // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029           // there's no new declaration.
1030           NewFn = nullptr;
1031           return true;
1032         }
1033         break; // No other 'amdgcn.atomic.*'
1034       }
1035 
1036       if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037           Name.starts_with("ds.fmax")) {
1038         // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039         // declaration.
1040         NewFn = nullptr;
1041         return true;
1042       }
1043 
1044       if (Name.starts_with("ldexp.")) {
1045         // Target specific intrinsic became redundant
1046         NewFn = Intrinsic::getDeclaration(
1047           F->getParent(), Intrinsic::ldexp,
1048           {F->getReturnType(), F->getArg(1)->getType()});
1049         return true;
1050       }
1051       break; // No other 'amdgcn.*'
1052     }
1053 
1054     break;
1055   }
1056   case 'c': {
1057     if (F->arg_size() == 1) {
1058       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1059                              .StartsWith("ctlz.", Intrinsic::ctlz)
1060                              .StartsWith("cttz.", Intrinsic::cttz)
1061                              .Default(Intrinsic::not_intrinsic);
1062       if (ID != Intrinsic::not_intrinsic) {
1063         rename(F);
1064         NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065                                           F->arg_begin()->getType());
1066         return true;
1067       }
1068     }
1069 
1070     if (F->arg_size() == 2 && Name == "coro.end") {
1071       rename(F);
1072       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073       return true;
1074     }
1075 
1076     break;
1077   }
1078   case 'd':
1079     if (Name.consume_front("dbg.")) {
1080       // Mark debug intrinsics for upgrade to new debug format.
1081       if (CanUpgradeDebugIntrinsicsToRecords &&
1082           F->getParent()->IsNewDbgInfoFormat) {
1083         if (Name == "addr" || Name == "value" || Name == "assign" ||
1084             Name == "declare" || Name == "label") {
1085           // There's no function to replace these with.
1086           NewFn = nullptr;
1087           // But we do want these to get upgraded.
1088           return true;
1089         }
1090       }
1091       // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092       // converted to DbgVariableRecords later.
1093       if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094         rename(F);
1095         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096         return true;
1097       }
1098       break; // No other 'dbg.*'.
1099     }
1100     break;
1101   case 'e':
1102     if (Name.consume_front("experimental.vector.")) {
1103       Intrinsic::ID ID =
1104           StringSwitch<Intrinsic::ID>(Name)
1105               .StartsWith("extract.", Intrinsic::vector_extract)
1106               .StartsWith("insert.", Intrinsic::vector_insert)
1107               .StartsWith("splice.", Intrinsic::vector_splice)
1108               .StartsWith("reverse.", Intrinsic::vector_reverse)
1109               .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110               .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1111               .Default(Intrinsic::not_intrinsic);
1112       if (ID != Intrinsic::not_intrinsic) {
1113         const auto *FT = F->getFunctionType();
1114         SmallVector<Type *, 2> Tys;
1115         if (ID == Intrinsic::vector_extract ||
1116             ID == Intrinsic::vector_interleave2)
1117           // Extracting overloads the return type.
1118           Tys.push_back(FT->getReturnType());
1119         if (ID != Intrinsic::vector_interleave2)
1120           Tys.push_back(FT->getParamType(0));
1121         if (ID == Intrinsic::vector_insert)
1122           // Inserting overloads the inserted type.
1123           Tys.push_back(FT->getParamType(1));
1124         rename(F);
1125         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126         return true;
1127       }
1128 
1129       if (Name.consume_front("reduce.")) {
1130         SmallVector<StringRef, 2> Groups;
1131         static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132         if (R.match(Name, &Groups))
1133           ID = StringSwitch<Intrinsic::ID>(Groups[1])
1134                    .Case("add", Intrinsic::vector_reduce_add)
1135                    .Case("mul", Intrinsic::vector_reduce_mul)
1136                    .Case("and", Intrinsic::vector_reduce_and)
1137                    .Case("or", Intrinsic::vector_reduce_or)
1138                    .Case("xor", Intrinsic::vector_reduce_xor)
1139                    .Case("smax", Intrinsic::vector_reduce_smax)
1140                    .Case("smin", Intrinsic::vector_reduce_smin)
1141                    .Case("umax", Intrinsic::vector_reduce_umax)
1142                    .Case("umin", Intrinsic::vector_reduce_umin)
1143                    .Case("fmax", Intrinsic::vector_reduce_fmax)
1144                    .Case("fmin", Intrinsic::vector_reduce_fmin)
1145                    .Default(Intrinsic::not_intrinsic);
1146 
1147         bool V2 = false;
1148         if (ID == Intrinsic::not_intrinsic) {
1149           static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150           Groups.clear();
1151           V2 = true;
1152           if (R2.match(Name, &Groups))
1153             ID = StringSwitch<Intrinsic::ID>(Groups[1])
1154                      .Case("fadd", Intrinsic::vector_reduce_fadd)
1155                      .Case("fmul", Intrinsic::vector_reduce_fmul)
1156                      .Default(Intrinsic::not_intrinsic);
1157         }
1158         if (ID != Intrinsic::not_intrinsic) {
1159           rename(F);
1160           auto Args = F->getFunctionType()->params();
1161           NewFn =
1162               Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163           return true;
1164         }
1165         break; // No other 'expermental.vector.reduce.*'.
1166       }
1167       break; // No other 'experimental.vector.*'.
1168     }
1169     break; // No other 'e*'.
1170   case 'f':
1171     if (Name.starts_with("flt.rounds")) {
1172       rename(F);
1173       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174       return true;
1175     }
1176     break;
1177   case 'i':
1178     if (Name.starts_with("invariant.group.barrier")) {
1179       // Rename invariant.group.barrier to launder.invariant.group
1180       auto Args = F->getFunctionType()->params();
1181       Type* ObjectPtr[1] = {Args[0]};
1182       rename(F);
1183       NewFn = Intrinsic::getDeclaration(F->getParent(),
1184           Intrinsic::launder_invariant_group, ObjectPtr);
1185       return true;
1186     }
1187     break;
1188   case 'm': {
1189     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190     // alignment parameter to embedding the alignment as an attribute of
1191     // the pointer args.
1192     if (unsigned ID = StringSwitch<unsigned>(Name)
1193                           .StartsWith("memcpy.", Intrinsic::memcpy)
1194                           .StartsWith("memmove.", Intrinsic::memmove)
1195                           .Default(0)) {
1196       if (F->arg_size() == 5) {
1197         rename(F);
1198         // Get the types of dest, src, and len
1199         ArrayRef<Type *> ParamTypes =
1200             F->getFunctionType()->params().slice(0, 3);
1201         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202         return true;
1203       }
1204     }
1205     if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206       rename(F);
1207       // Get the types of dest, and len
1208       const auto *FT = F->getFunctionType();
1209       Type *ParamTypes[2] = {
1210           FT->getParamType(0), // Dest
1211           FT->getParamType(2)  // len
1212       };
1213       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214                                         ParamTypes);
1215       return true;
1216     }
1217     break;
1218   }
1219   case 'n': {
1220     if (Name.consume_front("nvvm.")) {
1221       // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222       if (F->arg_size() == 1) {
1223         Intrinsic::ID IID =
1224             StringSwitch<Intrinsic::ID>(Name)
1225                 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1226                 .Case("clz.i", Intrinsic::ctlz)
1227                 .Case("popc.i", Intrinsic::ctpop)
1228                 .Default(Intrinsic::not_intrinsic);
1229         if (IID != Intrinsic::not_intrinsic) {
1230           NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231                                             {F->getReturnType()});
1232           return true;
1233         }
1234       }
1235 
1236       // Check for nvvm intrinsics that need a return type adjustment.
1237       if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1238         Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1239         if (IID != Intrinsic::not_intrinsic) {
1240           NewFn = nullptr;
1241           return true;
1242         }
1243       }
1244 
1245       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1247       //
1248       // TODO: We could add lohi.i2d.
1249       bool Expand = false;
1250       if (Name.consume_front("abs."))
1251         // nvvm.abs.{i,ii}
1252         Expand = Name == "i" || Name == "ll";
1253       else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254         Expand = true;
1255       else if (Name.consume_front("max.") || Name.consume_front("min."))
1256         // nvvm.{min,max}.{i,ii,ui,ull}
1257         Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258                  Name == "ui" || Name == "ull";
1259       else if (Name.consume_front("atomic.load.add."))
1260         // nvvm.atomic.load.add.{f32.p,f64.p}
1261         Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262       else
1263         Expand = false;
1264 
1265       if (Expand) {
1266         NewFn = nullptr;
1267         return true;
1268       }
1269       break; // No other 'nvvm.*'.
1270     }
1271     break;
1272   }
1273   case 'o':
1274     // We only need to change the name to match the mangling including the
1275     // address space.
1276     if (Name.starts_with("objectsize.")) {
1277       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278       if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279           F->getName() !=
1280               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281         rename(F);
1282         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283                                           Tys);
1284         return true;
1285       }
1286     }
1287     break;
1288 
1289   case 'p':
1290     if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291       rename(F);
1292       NewFn = Intrinsic::getDeclaration(
1293           F->getParent(), Intrinsic::ptr_annotation,
1294           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1295       return true;
1296     }
1297     break;
1298 
1299   case 'r': {
1300     if (Name.consume_front("riscv.")) {
1301       Intrinsic::ID ID;
1302       ID = StringSwitch<Intrinsic::ID>(Name)
1303                .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304                .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305                .Case("aes32esi", Intrinsic::riscv_aes32esi)
1306                .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1307                .Default(Intrinsic::not_intrinsic);
1308       if (ID != Intrinsic::not_intrinsic) {
1309         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310           rename(F);
1311           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312           return true;
1313         }
1314         break; // No other applicable upgrades.
1315       }
1316 
1317       ID = StringSwitch<Intrinsic::ID>(Name)
1318                .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319                .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1320                .Default(Intrinsic::not_intrinsic);
1321       if (ID != Intrinsic::not_intrinsic) {
1322         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323             F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324           rename(F);
1325           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326           return true;
1327         }
1328         break; // No other applicable upgrades.
1329       }
1330 
1331       ID = StringSwitch<Intrinsic::ID>(Name)
1332                .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333                .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334                .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335                .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336                .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337                .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1338                .Default(Intrinsic::not_intrinsic);
1339       if (ID != Intrinsic::not_intrinsic) {
1340         if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341           rename(F);
1342           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343           return true;
1344         }
1345         break; // No other applicable upgrades.
1346       }
1347       break; // No other 'riscv.*' intrinsics
1348     }
1349   } break;
1350 
1351   case 's':
1352     if (Name == "stackprotectorcheck") {
1353       NewFn = nullptr;
1354       return true;
1355     }
1356     break;
1357 
1358   case 'v': {
1359     if (Name == "var.annotation" && F->arg_size() == 4) {
1360       rename(F);
1361       NewFn = Intrinsic::getDeclaration(
1362           F->getParent(), Intrinsic::var_annotation,
1363           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364       return true;
1365     }
1366     break;
1367   }
1368 
1369   case 'w':
1370     if (Name.consume_front("wasm.")) {
1371       Intrinsic::ID ID =
1372           StringSwitch<Intrinsic::ID>(Name)
1373               .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374               .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375               .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1376               .Default(Intrinsic::not_intrinsic);
1377       if (ID != Intrinsic::not_intrinsic) {
1378         rename(F);
1379         NewFn =
1380             Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381         return true;
1382       }
1383 
1384       if (Name.consume_front("dot.i8x16.i7x16.")) {
1385         ID = StringSwitch<Intrinsic::ID>(Name)
1386                  .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387                  .Case("add.signed",
1388                        Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1389                  .Default(Intrinsic::not_intrinsic);
1390         if (ID != Intrinsic::not_intrinsic) {
1391           rename(F);
1392           NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393           return true;
1394         }
1395         break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396       }
1397       break; // No other 'wasm.*'.
1398     }
1399     break;
1400 
1401   case 'x':
1402     if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403       return true;
1404   }
1405 
1406   auto *ST = dyn_cast<StructType>(F->getReturnType());
1407   if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408       F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409     // Replace return type with literal non-packed struct. Only do this for
1410     // intrinsics declared to return a struct, not for intrinsics with
1411     // overloaded return type, in which case the exact struct type will be
1412     // mangled into the name.
1413     SmallVector<Intrinsic::IITDescriptor> Desc;
1414     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1415     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416       auto *FT = F->getFunctionType();
1417       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419       std::string Name = F->getName().str();
1420       rename(F);
1421       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422                                Name, F->getParent());
1423 
1424       // The new function may also need remangling.
1425       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426         NewFn = *Result;
1427       return true;
1428     }
1429   }
1430 
1431   // Remangle our intrinsic since we upgrade the mangling
1432   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1433   if (Result != std::nullopt) {
1434     NewFn = *Result;
1435     return true;
1436   }
1437 
1438   //  This may not belong here. This function is effectively being overloaded
1439   //  to both detect an intrinsic which needs upgrading, and to provide the
1440   //  upgraded form of the intrinsic. We should perhaps have two separate
1441   //  functions for this.
1442   return false;
1443 }
1444 
UpgradeIntrinsicFunction(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1445 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1446                                     bool CanUpgradeDebugIntrinsicsToRecords) {
1447   NewFn = nullptr;
1448   bool Upgraded =
1449       upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451 
1452   // Upgrade intrinsic attributes.  This does not change the function.
1453   if (NewFn)
1454     F = NewFn;
1455   if (Intrinsic::ID id = F->getIntrinsicID())
1456     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457   return Upgraded;
1458 }
1459 
UpgradeGlobalVariable(GlobalVariable * GV)1460 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1461   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462                           GV->getName() == "llvm.global_dtors")) ||
1463       !GV->hasInitializer())
1464     return nullptr;
1465   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466   if (!ATy)
1467     return nullptr;
1468   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469   if (!STy || STy->getNumElements() != 2)
1470     return nullptr;
1471 
1472   LLVMContext &C = GV->getContext();
1473   IRBuilder<> IRB(C);
1474   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475                                IRB.getPtrTy());
1476   Constant *Init = GV->getInitializer();
1477   unsigned N = Init->getNumOperands();
1478   std::vector<Constant *> NewCtors(N);
1479   for (unsigned i = 0; i != N; ++i) {
1480     auto Ctor = cast<Constant>(Init->getOperand(i));
1481     NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482                                       Ctor->getAggregateElement(1),
1483                                       Constant::getNullValue(IRB.getPtrTy()));
1484   }
1485   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486 
1487   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488                             NewInit, GV->getName());
1489 }
1490 
1491 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492 // to byte shuffles.
upgradeX86PSLLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1493 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1494                                          unsigned Shift) {
1495   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496   unsigned NumElts = ResultTy->getNumElements() * 8;
1497 
1498   // Bitcast from a 64-bit element type to a byte element type.
1499   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501 
1502   // We'll be shuffling in zeroes.
1503   Value *Res = Constant::getNullValue(VecTy);
1504 
1505   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506   // we'll just return the zero vector.
1507   if (Shift < 16) {
1508     int Idxs[64];
1509     // 256/512-bit version is split into 2/4 16-byte lanes.
1510     for (unsigned l = 0; l != NumElts; l += 16)
1511       for (unsigned i = 0; i != 16; ++i) {
1512         unsigned Idx = NumElts + i - Shift;
1513         if (Idx < NumElts)
1514           Idx -= NumElts - 16; // end of lane, switch operand.
1515         Idxs[l + i] = Idx + l;
1516       }
1517 
1518     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519   }
1520 
1521   // Bitcast back to a 64-bit element type.
1522   return Builder.CreateBitCast(Res, ResultTy, "cast");
1523 }
1524 
1525 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526 // to byte shuffles.
upgradeX86PSRLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1527 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1528                                          unsigned Shift) {
1529   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530   unsigned NumElts = ResultTy->getNumElements() * 8;
1531 
1532   // Bitcast from a 64-bit element type to a byte element type.
1533   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535 
1536   // We'll be shuffling in zeroes.
1537   Value *Res = Constant::getNullValue(VecTy);
1538 
1539   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540   // we'll just return the zero vector.
1541   if (Shift < 16) {
1542     int Idxs[64];
1543     // 256/512-bit version is split into 2/4 16-byte lanes.
1544     for (unsigned l = 0; l != NumElts; l += 16)
1545       for (unsigned i = 0; i != 16; ++i) {
1546         unsigned Idx = i + Shift;
1547         if (Idx >= 16)
1548           Idx += NumElts - 16; // end of lane, switch operand.
1549         Idxs[l + i] = Idx + l;
1550       }
1551 
1552     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553   }
1554 
1555   // Bitcast back to a 64-bit element type.
1556   return Builder.CreateBitCast(Res, ResultTy, "cast");
1557 }
1558 
getX86MaskVec(IRBuilder<> & Builder,Value * Mask,unsigned NumElts)1559 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560                             unsigned NumElts) {
1561   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1562   llvm::VectorType *MaskTy = FixedVectorType::get(
1563       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564   Mask = Builder.CreateBitCast(Mask, MaskTy);
1565 
1566   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567   // i8 and we need to extract down to the right number of elements.
1568   if (NumElts <= 4) {
1569     int Indices[4];
1570     for (unsigned i = 0; i != NumElts; ++i)
1571       Indices[i] = i;
1572     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573                                        "extract");
1574   }
1575 
1576   return Mask;
1577 }
1578 
emitX86Select(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1579 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580                             Value *Op1) {
1581   // If the mask is all ones just emit the first operation.
1582   if (const auto *C = dyn_cast<Constant>(Mask))
1583     if (C->isAllOnesValue())
1584       return Op0;
1585 
1586   Mask = getX86MaskVec(Builder, Mask,
1587                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1588   return Builder.CreateSelect(Mask, Op0, Op1);
1589 }
1590 
emitX86ScalarSelect(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1591 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592                                   Value *Op1) {
1593   // If the mask is all ones just emit the first operation.
1594   if (const auto *C = dyn_cast<Constant>(Mask))
1595     if (C->isAllOnesValue())
1596       return Op0;
1597 
1598   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599                                       Mask->getType()->getIntegerBitWidth());
1600   Mask = Builder.CreateBitCast(Mask, MaskTy);
1601   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602   return Builder.CreateSelect(Mask, Op0, Op1);
1603 }
1604 
1605 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
upgradeX86ALIGNIntrinsics(IRBuilder<> & Builder,Value * Op0,Value * Op1,Value * Shift,Value * Passthru,Value * Mask,bool IsVALIGN)1608 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1609                                         Value *Op1, Value *Shift,
1610                                         Value *Passthru, Value *Mask,
1611                                         bool IsVALIGN) {
1612   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613 
1614   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618 
1619   // Mask the immediate for VALIGN.
1620   if (IsVALIGN)
1621     ShiftVal &= (NumElts - 1);
1622 
1623   // If palignr is shifting the pair of vectors more than the size of two
1624   // lanes, emit zero.
1625   if (ShiftVal >= 32)
1626     return llvm::Constant::getNullValue(Op0->getType());
1627 
1628   // If palignr is shifting the pair of input vectors more than one lane,
1629   // but less than two lanes, convert to shifting in zeroes.
1630   if (ShiftVal > 16) {
1631     ShiftVal -= 16;
1632     Op1 = Op0;
1633     Op0 = llvm::Constant::getNullValue(Op0->getType());
1634   }
1635 
1636   int Indices[64];
1637   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1638   for (unsigned l = 0; l < NumElts; l += 16) {
1639     for (unsigned i = 0; i != 16; ++i) {
1640       unsigned Idx = ShiftVal + i;
1641       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642         Idx += NumElts - 16; // End of lane, switch operand.
1643       Indices[l + i] = Idx + l;
1644     }
1645   }
1646 
1647   Value *Align = Builder.CreateShuffleVector(
1648       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649 
1650   return emitX86Select(Builder, Mask, Align, Passthru);
1651 }
1652 
upgradeX86VPERMT2Intrinsics(IRBuilder<> & Builder,CallBase & CI,bool ZeroMask,bool IndexForm)1653 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1654                                           bool ZeroMask, bool IndexForm) {
1655   Type *Ty = CI.getType();
1656   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657   unsigned EltWidth = Ty->getScalarSizeInBits();
1658   bool IsFloat = Ty->isFPOrFPVectorTy();
1659   Intrinsic::ID IID;
1660   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684   else if (VecWidth == 128 && EltWidth == 16)
1685     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686   else if (VecWidth == 256 && EltWidth == 16)
1687     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688   else if (VecWidth == 512 && EltWidth == 16)
1689     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690   else if (VecWidth == 128 && EltWidth == 8)
1691     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692   else if (VecWidth == 256 && EltWidth == 8)
1693     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694   else if (VecWidth == 512 && EltWidth == 8)
1695     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696   else
1697     llvm_unreachable("Unexpected intrinsic");
1698 
1699   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700                     CI.getArgOperand(2) };
1701 
1702   // If this isn't index form we need to swap operand 0 and 1.
1703   if (!IndexForm)
1704     std::swap(Args[0], Args[1]);
1705 
1706   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707                                 Args);
1708   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709                              : Builder.CreateBitCast(CI.getArgOperand(1),
1710                                                      Ty);
1711   return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712 }
1713 
upgradeX86BinaryIntrinsics(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1714 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1715                                          Intrinsic::ID IID) {
1716   Type *Ty = CI.getType();
1717   Value *Op0 = CI.getOperand(0);
1718   Value *Op1 = CI.getOperand(1);
1719   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721 
1722   if (CI.arg_size() == 4) { // For masked intrinsics.
1723     Value *VecSrc = CI.getOperand(2);
1724     Value *Mask = CI.getOperand(3);
1725     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726   }
1727   return Res;
1728 }
1729 
upgradeX86Rotate(IRBuilder<> & Builder,CallBase & CI,bool IsRotateRight)1730 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1731                                bool IsRotateRight) {
1732   Type *Ty = CI.getType();
1733   Value *Src = CI.getArgOperand(0);
1734   Value *Amt = CI.getArgOperand(1);
1735 
1736   // Amount may be scalar immediate, in which case create a splat vector.
1737   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738   // we only care about the lowest log2 bits anyway.
1739   if (Amt->getType() != Ty) {
1740     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743   }
1744 
1745   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748 
1749   if (CI.arg_size() == 4) { // For masked intrinsics.
1750     Value *VecSrc = CI.getOperand(2);
1751     Value *Mask = CI.getOperand(3);
1752     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753   }
1754   return Res;
1755 }
1756 
upgradeX86vpcom(IRBuilder<> & Builder,CallBase & CI,unsigned Imm,bool IsSigned)1757 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758                               bool IsSigned) {
1759   Type *Ty = CI.getType();
1760   Value *LHS = CI.getArgOperand(0);
1761   Value *RHS = CI.getArgOperand(1);
1762 
1763   CmpInst::Predicate Pred;
1764   switch (Imm) {
1765   case 0x0:
1766     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767     break;
1768   case 0x1:
1769     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770     break;
1771   case 0x2:
1772     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773     break;
1774   case 0x3:
1775     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776     break;
1777   case 0x4:
1778     Pred = ICmpInst::ICMP_EQ;
1779     break;
1780   case 0x5:
1781     Pred = ICmpInst::ICMP_NE;
1782     break;
1783   case 0x6:
1784     return Constant::getNullValue(Ty); // FALSE
1785   case 0x7:
1786     return Constant::getAllOnesValue(Ty); // TRUE
1787   default:
1788     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789   }
1790 
1791   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793   return Ext;
1794 }
1795 
upgradeX86ConcatShift(IRBuilder<> & Builder,CallBase & CI,bool IsShiftRight,bool ZeroMask)1796 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1797                                     bool IsShiftRight, bool ZeroMask) {
1798   Type *Ty = CI.getType();
1799   Value *Op0 = CI.getArgOperand(0);
1800   Value *Op1 = CI.getArgOperand(1);
1801   Value *Amt = CI.getArgOperand(2);
1802 
1803   if (IsShiftRight)
1804     std::swap(Op0, Op1);
1805 
1806   // Amount may be scalar immediate, in which case create a splat vector.
1807   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808   // we only care about the lowest log2 bits anyway.
1809   if (Amt->getType() != Ty) {
1810     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813   }
1814 
1815   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818 
1819   unsigned NumArgs = CI.arg_size();
1820   if (NumArgs >= 4) { // For masked intrinsics.
1821     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1823                                    CI.getArgOperand(0);
1824     Value *Mask = CI.getOperand(NumArgs - 1);
1825     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826   }
1827   return Res;
1828 }
1829 
upgradeMaskedStore(IRBuilder<> & Builder,Value * Ptr,Value * Data,Value * Mask,bool Aligned)1830 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1831                                  Value *Mask, bool Aligned) {
1832   // Cast the pointer to the right type.
1833   Ptr = Builder.CreateBitCast(Ptr,
1834                               llvm::PointerType::getUnqual(Data->getType()));
1835   const Align Alignment =
1836       Aligned
1837           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838           : Align(1);
1839 
1840   // If the mask is all ones just emit a regular store.
1841   if (const auto *C = dyn_cast<Constant>(Mask))
1842     if (C->isAllOnesValue())
1843       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844 
1845   // Convert the mask from an integer type to a vector of i1.
1846   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847   Mask = getX86MaskVec(Builder, Mask, NumElts);
1848   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849 }
1850 
upgradeMaskedLoad(IRBuilder<> & Builder,Value * Ptr,Value * Passthru,Value * Mask,bool Aligned)1851 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1852                                 Value *Passthru, Value *Mask, bool Aligned) {
1853   Type *ValTy = Passthru->getType();
1854   // Cast the pointer to the right type.
1855   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1856   const Align Alignment =
1857       Aligned
1858           ? Align(
1859                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1860                 8)
1861           : Align(1);
1862 
1863   // If the mask is all ones just emit a regular store.
1864   if (const auto *C = dyn_cast<Constant>(Mask))
1865     if (C->isAllOnesValue())
1866       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867 
1868   // Convert the mask from an integer type to a vector of i1.
1869   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870   Mask = getX86MaskVec(Builder, Mask, NumElts);
1871   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872 }
1873 
upgradeAbs(IRBuilder<> & Builder,CallBase & CI)1874 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875   Type *Ty = CI.getType();
1876   Value *Op0 = CI.getArgOperand(0);
1877   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879   if (CI.arg_size() == 3)
1880     Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881   return Res;
1882 }
1883 
upgradePMULDQ(IRBuilder<> & Builder,CallBase & CI,bool IsSigned)1884 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885   Type *Ty = CI.getType();
1886 
1887   // Arguments have a vXi32 type so cast to vXi64.
1888   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890 
1891   if (IsSigned) {
1892     // Shift left then arithmetic shift right.
1893     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894     LHS = Builder.CreateShl(LHS, ShiftAmt);
1895     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896     RHS = Builder.CreateShl(RHS, ShiftAmt);
1897     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898   } else {
1899     // Clear the upper bits.
1900     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901     LHS = Builder.CreateAnd(LHS, Mask);
1902     RHS = Builder.CreateAnd(RHS, Mask);
1903   }
1904 
1905   Value *Res = Builder.CreateMul(LHS, RHS);
1906 
1907   if (CI.arg_size() == 4)
1908     Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909 
1910   return Res;
1911 }
1912 
1913 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
applyX86MaskOn1BitsVec(IRBuilder<> & Builder,Value * Vec,Value * Mask)1914 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1915                                      Value *Mask) {
1916   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917   if (Mask) {
1918     const auto *C = dyn_cast<Constant>(Mask);
1919     if (!C || !C->isAllOnesValue())
1920       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921   }
1922 
1923   if (NumElts < 8) {
1924     int Indices[8];
1925     for (unsigned i = 0; i != NumElts; ++i)
1926       Indices[i] = i;
1927     for (unsigned i = NumElts; i != 8; ++i)
1928       Indices[i] = NumElts + i % NumElts;
1929     Vec = Builder.CreateShuffleVector(Vec,
1930                                       Constant::getNullValue(Vec->getType()),
1931                                       Indices);
1932   }
1933   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934 }
1935 
upgradeMaskedCompare(IRBuilder<> & Builder,CallBase & CI,unsigned CC,bool Signed)1936 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1937                                    unsigned CC, bool Signed) {
1938   Value *Op0 = CI.getArgOperand(0);
1939   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940 
1941   Value *Cmp;
1942   if (CC == 3) {
1943     Cmp = Constant::getNullValue(
1944         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945   } else if (CC == 7) {
1946     Cmp = Constant::getAllOnesValue(
1947         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948   } else {
1949     ICmpInst::Predicate Pred;
1950     switch (CC) {
1951     default: llvm_unreachable("Unknown condition code");
1952     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1953     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955     case 4: Pred = ICmpInst::ICMP_NE;  break;
1956     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958     }
1959     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960   }
1961 
1962   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963 
1964   return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965 }
1966 
1967 // Replace a masked intrinsic with an older unmasked intrinsic.
upgradeX86MaskedShift(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1968 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1969                                     Intrinsic::ID IID) {
1970   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971   Value *Rep = Builder.CreateCall(Intrin,
1972                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1973   return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974 }
1975 
upgradeMaskedMove(IRBuilder<> & Builder,CallBase & CI)1976 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1977   Value* A = CI.getArgOperand(0);
1978   Value* B = CI.getArgOperand(1);
1979   Value* Src = CI.getArgOperand(2);
1980   Value* Mask = CI.getArgOperand(3);
1981 
1982   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988 }
1989 
upgradeMaskToInt(IRBuilder<> & Builder,CallBase & CI)1990 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1991   Value* Op = CI.getArgOperand(0);
1992   Type* ReturnOp = CI.getType();
1993   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996 }
1997 
1998 // Replace intrinsic with unmasked version and a select.
upgradeAVX512MaskToSelect(StringRef Name,IRBuilder<> & Builder,CallBase & CI,Value * & Rep)1999 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2000                                       CallBase &CI, Value *&Rep) {
2001   Name = Name.substr(12); // Remove avx512.mask.
2002 
2003   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005   Intrinsic::ID IID;
2006   if (Name.starts_with("max.p")) {
2007     if (VecWidth == 128 && EltWidth == 32)
2008       IID = Intrinsic::x86_sse_max_ps;
2009     else if (VecWidth == 128 && EltWidth == 64)
2010       IID = Intrinsic::x86_sse2_max_pd;
2011     else if (VecWidth == 256 && EltWidth == 32)
2012       IID = Intrinsic::x86_avx_max_ps_256;
2013     else if (VecWidth == 256 && EltWidth == 64)
2014       IID = Intrinsic::x86_avx_max_pd_256;
2015     else
2016       llvm_unreachable("Unexpected intrinsic");
2017   } else if (Name.starts_with("min.p")) {
2018     if (VecWidth == 128 && EltWidth == 32)
2019       IID = Intrinsic::x86_sse_min_ps;
2020     else if (VecWidth == 128 && EltWidth == 64)
2021       IID = Intrinsic::x86_sse2_min_pd;
2022     else if (VecWidth == 256 && EltWidth == 32)
2023       IID = Intrinsic::x86_avx_min_ps_256;
2024     else if (VecWidth == 256 && EltWidth == 64)
2025       IID = Intrinsic::x86_avx_min_pd_256;
2026     else
2027       llvm_unreachable("Unexpected intrinsic");
2028   } else if (Name.starts_with("pshuf.b.")) {
2029     if (VecWidth == 128)
2030       IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031     else if (VecWidth == 256)
2032       IID = Intrinsic::x86_avx2_pshuf_b;
2033     else if (VecWidth == 512)
2034       IID = Intrinsic::x86_avx512_pshuf_b_512;
2035     else
2036       llvm_unreachable("Unexpected intrinsic");
2037   } else if (Name.starts_with("pmul.hr.sw.")) {
2038     if (VecWidth == 128)
2039       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040     else if (VecWidth == 256)
2041       IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042     else if (VecWidth == 512)
2043       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044     else
2045       llvm_unreachable("Unexpected intrinsic");
2046   } else if (Name.starts_with("pmulh.w.")) {
2047     if (VecWidth == 128)
2048       IID = Intrinsic::x86_sse2_pmulh_w;
2049     else if (VecWidth == 256)
2050       IID = Intrinsic::x86_avx2_pmulh_w;
2051     else if (VecWidth == 512)
2052       IID = Intrinsic::x86_avx512_pmulh_w_512;
2053     else
2054       llvm_unreachable("Unexpected intrinsic");
2055   } else if (Name.starts_with("pmulhu.w.")) {
2056     if (VecWidth == 128)
2057       IID = Intrinsic::x86_sse2_pmulhu_w;
2058     else if (VecWidth == 256)
2059       IID = Intrinsic::x86_avx2_pmulhu_w;
2060     else if (VecWidth == 512)
2061       IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062     else
2063       llvm_unreachable("Unexpected intrinsic");
2064   } else if (Name.starts_with("pmaddw.d.")) {
2065     if (VecWidth == 128)
2066       IID = Intrinsic::x86_sse2_pmadd_wd;
2067     else if (VecWidth == 256)
2068       IID = Intrinsic::x86_avx2_pmadd_wd;
2069     else if (VecWidth == 512)
2070       IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071     else
2072       llvm_unreachable("Unexpected intrinsic");
2073   } else if (Name.starts_with("pmaddubs.w.")) {
2074     if (VecWidth == 128)
2075       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076     else if (VecWidth == 256)
2077       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078     else if (VecWidth == 512)
2079       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080     else
2081       llvm_unreachable("Unexpected intrinsic");
2082   } else if (Name.starts_with("packsswb.")) {
2083     if (VecWidth == 128)
2084       IID = Intrinsic::x86_sse2_packsswb_128;
2085     else if (VecWidth == 256)
2086       IID = Intrinsic::x86_avx2_packsswb;
2087     else if (VecWidth == 512)
2088       IID = Intrinsic::x86_avx512_packsswb_512;
2089     else
2090       llvm_unreachable("Unexpected intrinsic");
2091   } else if (Name.starts_with("packssdw.")) {
2092     if (VecWidth == 128)
2093       IID = Intrinsic::x86_sse2_packssdw_128;
2094     else if (VecWidth == 256)
2095       IID = Intrinsic::x86_avx2_packssdw;
2096     else if (VecWidth == 512)
2097       IID = Intrinsic::x86_avx512_packssdw_512;
2098     else
2099       llvm_unreachable("Unexpected intrinsic");
2100   } else if (Name.starts_with("packuswb.")) {
2101     if (VecWidth == 128)
2102       IID = Intrinsic::x86_sse2_packuswb_128;
2103     else if (VecWidth == 256)
2104       IID = Intrinsic::x86_avx2_packuswb;
2105     else if (VecWidth == 512)
2106       IID = Intrinsic::x86_avx512_packuswb_512;
2107     else
2108       llvm_unreachable("Unexpected intrinsic");
2109   } else if (Name.starts_with("packusdw.")) {
2110     if (VecWidth == 128)
2111       IID = Intrinsic::x86_sse41_packusdw;
2112     else if (VecWidth == 256)
2113       IID = Intrinsic::x86_avx2_packusdw;
2114     else if (VecWidth == 512)
2115       IID = Intrinsic::x86_avx512_packusdw_512;
2116     else
2117       llvm_unreachable("Unexpected intrinsic");
2118   } else if (Name.starts_with("vpermilvar.")) {
2119     if (VecWidth == 128 && EltWidth == 32)
2120       IID = Intrinsic::x86_avx_vpermilvar_ps;
2121     else if (VecWidth == 128 && EltWidth == 64)
2122       IID = Intrinsic::x86_avx_vpermilvar_pd;
2123     else if (VecWidth == 256 && EltWidth == 32)
2124       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125     else if (VecWidth == 256 && EltWidth == 64)
2126       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127     else if (VecWidth == 512 && EltWidth == 32)
2128       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129     else if (VecWidth == 512 && EltWidth == 64)
2130       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131     else
2132       llvm_unreachable("Unexpected intrinsic");
2133   } else if (Name == "cvtpd2dq.256") {
2134     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135   } else if (Name == "cvtpd2ps.256") {
2136     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137   } else if (Name == "cvttpd2dq.256") {
2138     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139   } else if (Name == "cvttps2dq.128") {
2140     IID = Intrinsic::x86_sse2_cvttps2dq;
2141   } else if (Name == "cvttps2dq.256") {
2142     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143   } else if (Name.starts_with("permvar.")) {
2144     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146       IID = Intrinsic::x86_avx2_permps;
2147     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148       IID = Intrinsic::x86_avx2_permd;
2149     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150       IID = Intrinsic::x86_avx512_permvar_df_256;
2151     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152       IID = Intrinsic::x86_avx512_permvar_di_256;
2153     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154       IID = Intrinsic::x86_avx512_permvar_sf_512;
2155     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156       IID = Intrinsic::x86_avx512_permvar_si_512;
2157     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158       IID = Intrinsic::x86_avx512_permvar_df_512;
2159     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160       IID = Intrinsic::x86_avx512_permvar_di_512;
2161     else if (VecWidth == 128 && EltWidth == 16)
2162       IID = Intrinsic::x86_avx512_permvar_hi_128;
2163     else if (VecWidth == 256 && EltWidth == 16)
2164       IID = Intrinsic::x86_avx512_permvar_hi_256;
2165     else if (VecWidth == 512 && EltWidth == 16)
2166       IID = Intrinsic::x86_avx512_permvar_hi_512;
2167     else if (VecWidth == 128 && EltWidth == 8)
2168       IID = Intrinsic::x86_avx512_permvar_qi_128;
2169     else if (VecWidth == 256 && EltWidth == 8)
2170       IID = Intrinsic::x86_avx512_permvar_qi_256;
2171     else if (VecWidth == 512 && EltWidth == 8)
2172       IID = Intrinsic::x86_avx512_permvar_qi_512;
2173     else
2174       llvm_unreachable("Unexpected intrinsic");
2175   } else if (Name.starts_with("dbpsadbw.")) {
2176     if (VecWidth == 128)
2177       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178     else if (VecWidth == 256)
2179       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180     else if (VecWidth == 512)
2181       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182     else
2183       llvm_unreachable("Unexpected intrinsic");
2184   } else if (Name.starts_with("pmultishift.qb.")) {
2185     if (VecWidth == 128)
2186       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187     else if (VecWidth == 256)
2188       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189     else if (VecWidth == 512)
2190       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191     else
2192       llvm_unreachable("Unexpected intrinsic");
2193   } else if (Name.starts_with("conflict.")) {
2194     if (Name[9] == 'd' && VecWidth == 128)
2195       IID = Intrinsic::x86_avx512_conflict_d_128;
2196     else if (Name[9] == 'd' && VecWidth == 256)
2197       IID = Intrinsic::x86_avx512_conflict_d_256;
2198     else if (Name[9] == 'd' && VecWidth == 512)
2199       IID = Intrinsic::x86_avx512_conflict_d_512;
2200     else if (Name[9] == 'q' && VecWidth == 128)
2201       IID = Intrinsic::x86_avx512_conflict_q_128;
2202     else if (Name[9] == 'q' && VecWidth == 256)
2203       IID = Intrinsic::x86_avx512_conflict_q_256;
2204     else if (Name[9] == 'q' && VecWidth == 512)
2205       IID = Intrinsic::x86_avx512_conflict_q_512;
2206     else
2207       llvm_unreachable("Unexpected intrinsic");
2208   } else if (Name.starts_with("pavg.")) {
2209     if (Name[5] == 'b' && VecWidth == 128)
2210       IID = Intrinsic::x86_sse2_pavg_b;
2211     else if (Name[5] == 'b' && VecWidth == 256)
2212       IID = Intrinsic::x86_avx2_pavg_b;
2213     else if (Name[5] == 'b' && VecWidth == 512)
2214       IID = Intrinsic::x86_avx512_pavg_b_512;
2215     else if (Name[5] == 'w' && VecWidth == 128)
2216       IID = Intrinsic::x86_sse2_pavg_w;
2217     else if (Name[5] == 'w' && VecWidth == 256)
2218       IID = Intrinsic::x86_avx2_pavg_w;
2219     else if (Name[5] == 'w' && VecWidth == 512)
2220       IID = Intrinsic::x86_avx512_pavg_w_512;
2221     else
2222       llvm_unreachable("Unexpected intrinsic");
2223   } else
2224     return false;
2225 
2226   SmallVector<Value *, 4> Args(CI.args());
2227   Args.pop_back();
2228   Args.pop_back();
2229   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230                            Args);
2231   unsigned NumArgs = CI.arg_size();
2232   Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233                       CI.getArgOperand(NumArgs - 2));
2234   return true;
2235 }
2236 
2237 /// Upgrade comment in call to inline asm that represents an objc retain release
2238 /// marker.
UpgradeInlineAsmString(std::string * AsmStr)2239 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240   size_t Pos;
2241   if (AsmStr->find("mov\tfp") == 0 &&
2242       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2244     AsmStr->replace(Pos, 1, ";");
2245   }
2246 }
2247 
upgradeX86IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2248 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2249                                       IRBuilder<> &Builder) {
2250   LLVMContext &C = F->getContext();
2251   Value *Rep = nullptr;
2252 
2253   if (Name.starts_with("sse4a.movnt.")) {
2254     SmallVector<Metadata *, 1> Elts;
2255     Elts.push_back(
2256         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2257     MDNode *Node = MDNode::get(C, Elts);
2258 
2259     Value *Arg0 = CI->getArgOperand(0);
2260     Value *Arg1 = CI->getArgOperand(1);
2261 
2262     // Nontemporal (unaligned) store of the 0'th element of the float/double
2263     // vector.
2264     Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2265     PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2266     Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2267     Value *Extract =
2268         Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2269 
2270     StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2271     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2272   } else if (Name.starts_with("avx.movnt.") ||
2273              Name.starts_with("avx512.storent.")) {
2274     SmallVector<Metadata *, 1> Elts;
2275     Elts.push_back(
2276         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2277     MDNode *Node = MDNode::get(C, Elts);
2278 
2279     Value *Arg0 = CI->getArgOperand(0);
2280     Value *Arg1 = CI->getArgOperand(1);
2281 
2282     // Convert the type of the pointer to a pointer to the stored type.
2283     Value *BC = Builder.CreateBitCast(
2284         Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2285     StoreInst *SI = Builder.CreateAlignedStore(
2286         Arg1, BC,
2287         Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2288     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2289   } else if (Name == "sse2.storel.dq") {
2290     Value *Arg0 = CI->getArgOperand(0);
2291     Value *Arg1 = CI->getArgOperand(1);
2292 
2293     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2294     Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2295     Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2296     Value *BC = Builder.CreateBitCast(
2297         Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2298     Builder.CreateAlignedStore(Elt, BC, Align(1));
2299   } else if (Name.starts_with("sse.storeu.") ||
2300              Name.starts_with("sse2.storeu.") ||
2301              Name.starts_with("avx.storeu.")) {
2302     Value *Arg0 = CI->getArgOperand(0);
2303     Value *Arg1 = CI->getArgOperand(1);
2304 
2305     Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2306                                  "cast");
2307     Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2308   } else if (Name == "avx512.mask.store.ss") {
2309     Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2310     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2311                        Mask, false);
2312   } else if (Name.starts_with("avx512.mask.store")) {
2313     // "avx512.mask.storeu." or "avx512.mask.store."
2314     bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2315     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2316                        CI->getArgOperand(2), Aligned);
2317   } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2318     // Upgrade packed integer vector compare intrinsics to compare instructions.
2319     // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2320     bool CmpEq = Name[9] == 'e';
2321     Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2322                              CI->getArgOperand(0), CI->getArgOperand(1));
2323     Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2324   } else if (Name.starts_with("avx512.broadcastm")) {
2325     Type *ExtTy = Type::getInt32Ty(C);
2326     if (CI->getOperand(0)->getType()->isIntegerTy(8))
2327       ExtTy = Type::getInt64Ty(C);
2328     unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2329                        ExtTy->getPrimitiveSizeInBits();
2330     Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2331     Rep = Builder.CreateVectorSplat(NumElts, Rep);
2332   } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2333     Value *Vec = CI->getArgOperand(0);
2334     Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2335     Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
2336                                                Elt0->getType());
2337     Elt0 = Builder.CreateCall(Intr, Elt0);
2338     Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2339   } else if (Name.starts_with("avx.sqrt.p") ||
2340              Name.starts_with("sse2.sqrt.p") ||
2341              Name.starts_with("sse.sqrt.p")) {
2342     Rep =
2343         Builder.CreateCall(Intrinsic::getDeclaration(
2344                                F->getParent(), Intrinsic::sqrt, CI->getType()),
2345                            {CI->getArgOperand(0)});
2346   } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2347     if (CI->arg_size() == 4 &&
2348         (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2349          cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2350       Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2351                                           : Intrinsic::x86_avx512_sqrt_pd_512;
2352 
2353       Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2354       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
2355                                Args);
2356     } else {
2357       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2358                                                          Intrinsic::sqrt,
2359                                                          CI->getType()),
2360                                {CI->getArgOperand(0)});
2361     }
2362     Rep =
2363         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2364   } else if (Name.starts_with("avx512.ptestm") ||
2365              Name.starts_with("avx512.ptestnm")) {
2366     Value *Op0 = CI->getArgOperand(0);
2367     Value *Op1 = CI->getArgOperand(1);
2368     Value *Mask = CI->getArgOperand(2);
2369     Rep = Builder.CreateAnd(Op0, Op1);
2370     llvm::Type *Ty = Op0->getType();
2371     Value *Zero = llvm::Constant::getNullValue(Ty);
2372     ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2373                                    ? ICmpInst::ICMP_NE
2374                                    : ICmpInst::ICMP_EQ;
2375     Rep = Builder.CreateICmp(Pred, Rep, Zero);
2376     Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2377   } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2378     unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2379                            ->getNumElements();
2380     Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2381     Rep =
2382         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2383   } else if (Name.starts_with("avx512.kunpck")) {
2384     unsigned NumElts = CI->getType()->getScalarSizeInBits();
2385     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2386     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2387     int Indices[64];
2388     for (unsigned i = 0; i != NumElts; ++i)
2389       Indices[i] = i;
2390 
2391     // First extract half of each vector. This gives better codegen than
2392     // doing it in a single shuffle.
2393     LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2394     RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2395     // Concat the vectors.
2396     // NOTE: Operands have to be swapped to match intrinsic definition.
2397     Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2398     Rep = Builder.CreateBitCast(Rep, CI->getType());
2399   } else if (Name == "avx512.kand.w") {
2400     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2401     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2402     Rep = Builder.CreateAnd(LHS, RHS);
2403     Rep = Builder.CreateBitCast(Rep, CI->getType());
2404   } else if (Name == "avx512.kandn.w") {
2405     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2406     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2407     LHS = Builder.CreateNot(LHS);
2408     Rep = Builder.CreateAnd(LHS, RHS);
2409     Rep = Builder.CreateBitCast(Rep, CI->getType());
2410   } else if (Name == "avx512.kor.w") {
2411     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2412     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2413     Rep = Builder.CreateOr(LHS, RHS);
2414     Rep = Builder.CreateBitCast(Rep, CI->getType());
2415   } else if (Name == "avx512.kxor.w") {
2416     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2417     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2418     Rep = Builder.CreateXor(LHS, RHS);
2419     Rep = Builder.CreateBitCast(Rep, CI->getType());
2420   } else if (Name == "avx512.kxnor.w") {
2421     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2422     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2423     LHS = Builder.CreateNot(LHS);
2424     Rep = Builder.CreateXor(LHS, RHS);
2425     Rep = Builder.CreateBitCast(Rep, CI->getType());
2426   } else if (Name == "avx512.knot.w") {
2427     Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2428     Rep = Builder.CreateNot(Rep);
2429     Rep = Builder.CreateBitCast(Rep, CI->getType());
2430   } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2431     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2432     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2433     Rep = Builder.CreateOr(LHS, RHS);
2434     Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2435     Value *C;
2436     if (Name[14] == 'c')
2437       C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2438     else
2439       C = ConstantInt::getNullValue(Builder.getInt16Ty());
2440     Rep = Builder.CreateICmpEQ(Rep, C);
2441     Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2442   } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2443              Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2444              Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2445              Name == "sse.div.ss" || Name == "sse2.div.sd") {
2446     Type *I32Ty = Type::getInt32Ty(C);
2447     Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2448                                                ConstantInt::get(I32Ty, 0));
2449     Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2450                                                ConstantInt::get(I32Ty, 0));
2451     Value *EltOp;
2452     if (Name.contains(".add."))
2453       EltOp = Builder.CreateFAdd(Elt0, Elt1);
2454     else if (Name.contains(".sub."))
2455       EltOp = Builder.CreateFSub(Elt0, Elt1);
2456     else if (Name.contains(".mul."))
2457       EltOp = Builder.CreateFMul(Elt0, Elt1);
2458     else
2459       EltOp = Builder.CreateFDiv(Elt0, Elt1);
2460     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2461                                       ConstantInt::get(I32Ty, 0));
2462   } else if (Name.starts_with("avx512.mask.pcmp")) {
2463     // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2464     bool CmpEq = Name[16] == 'e';
2465     Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2466   } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2467     Type *OpTy = CI->getArgOperand(0)->getType();
2468     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2469     Intrinsic::ID IID;
2470     switch (VecWidth) {
2471     default:
2472       llvm_unreachable("Unexpected intrinsic");
2473     case 128:
2474       IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2475       break;
2476     case 256:
2477       IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2478       break;
2479     case 512:
2480       IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2481       break;
2482     }
2483 
2484     Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2485                              {CI->getOperand(0), CI->getArgOperand(1)});
2486     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2487   } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2488     Type *OpTy = CI->getArgOperand(0)->getType();
2489     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2490     unsigned EltWidth = OpTy->getScalarSizeInBits();
2491     Intrinsic::ID IID;
2492     if (VecWidth == 128 && EltWidth == 32)
2493       IID = Intrinsic::x86_avx512_fpclass_ps_128;
2494     else if (VecWidth == 256 && EltWidth == 32)
2495       IID = Intrinsic::x86_avx512_fpclass_ps_256;
2496     else if (VecWidth == 512 && EltWidth == 32)
2497       IID = Intrinsic::x86_avx512_fpclass_ps_512;
2498     else if (VecWidth == 128 && EltWidth == 64)
2499       IID = Intrinsic::x86_avx512_fpclass_pd_128;
2500     else if (VecWidth == 256 && EltWidth == 64)
2501       IID = Intrinsic::x86_avx512_fpclass_pd_256;
2502     else if (VecWidth == 512 && EltWidth == 64)
2503       IID = Intrinsic::x86_avx512_fpclass_pd_512;
2504     else
2505       llvm_unreachable("Unexpected intrinsic");
2506 
2507     Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2508                              {CI->getOperand(0), CI->getArgOperand(1)});
2509     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2510   } else if (Name.starts_with("avx512.cmp.p")) {
2511     SmallVector<Value *, 4> Args(CI->args());
2512     Type *OpTy = Args[0]->getType();
2513     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2514     unsigned EltWidth = OpTy->getScalarSizeInBits();
2515     Intrinsic::ID IID;
2516     if (VecWidth == 128 && EltWidth == 32)
2517       IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2518     else if (VecWidth == 256 && EltWidth == 32)
2519       IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2520     else if (VecWidth == 512 && EltWidth == 32)
2521       IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2522     else if (VecWidth == 128 && EltWidth == 64)
2523       IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2524     else if (VecWidth == 256 && EltWidth == 64)
2525       IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2526     else if (VecWidth == 512 && EltWidth == 64)
2527       IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2528     else
2529       llvm_unreachable("Unexpected intrinsic");
2530 
2531     Value *Mask = Constant::getAllOnesValue(CI->getType());
2532     if (VecWidth == 512)
2533       std::swap(Mask, Args.back());
2534     Args.push_back(Mask);
2535 
2536     Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2537                              Args);
2538   } else if (Name.starts_with("avx512.mask.cmp.")) {
2539     // Integer compare intrinsics.
2540     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2541     Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2542   } else if (Name.starts_with("avx512.mask.ucmp.")) {
2543     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2544     Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2545   } else if (Name.starts_with("avx512.cvtb2mask.") ||
2546              Name.starts_with("avx512.cvtw2mask.") ||
2547              Name.starts_with("avx512.cvtd2mask.") ||
2548              Name.starts_with("avx512.cvtq2mask.")) {
2549     Value *Op = CI->getArgOperand(0);
2550     Value *Zero = llvm::Constant::getNullValue(Op->getType());
2551     Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2552     Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2553   } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2554              Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2555              Name.starts_with("avx512.mask.pabs")) {
2556     Rep = upgradeAbs(Builder, *CI);
2557   } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2558              Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2559              Name.starts_with("avx512.mask.pmaxs")) {
2560     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2561   } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2562              Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2563              Name.starts_with("avx512.mask.pmaxu")) {
2564     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2565   } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2566              Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2567              Name.starts_with("avx512.mask.pmins")) {
2568     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2569   } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2570              Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2571              Name.starts_with("avx512.mask.pminu")) {
2572     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2573   } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2574              Name == "avx512.pmulu.dq.512" ||
2575              Name.starts_with("avx512.mask.pmulu.dq.")) {
2576     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2577   } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2578              Name == "avx512.pmul.dq.512" ||
2579              Name.starts_with("avx512.mask.pmul.dq.")) {
2580     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2581   } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2582              Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2583     Rep =
2584         Builder.CreateSIToFP(CI->getArgOperand(1),
2585                              cast<VectorType>(CI->getType())->getElementType());
2586     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2587   } else if (Name == "avx512.cvtusi2sd") {
2588     Rep =
2589         Builder.CreateUIToFP(CI->getArgOperand(1),
2590                              cast<VectorType>(CI->getType())->getElementType());
2591     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2592   } else if (Name == "sse2.cvtss2sd") {
2593     Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2594     Rep = Builder.CreateFPExt(
2595         Rep, cast<VectorType>(CI->getType())->getElementType());
2596     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2597   } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2598              Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2599              Name.starts_with("avx512.mask.cvtdq2pd.") ||
2600              Name.starts_with("avx512.mask.cvtudq2pd.") ||
2601              Name.starts_with("avx512.mask.cvtdq2ps.") ||
2602              Name.starts_with("avx512.mask.cvtudq2ps.") ||
2603              Name.starts_with("avx512.mask.cvtqq2pd.") ||
2604              Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2605              Name == "avx512.mask.cvtqq2ps.256" ||
2606              Name == "avx512.mask.cvtqq2ps.512" ||
2607              Name == "avx512.mask.cvtuqq2ps.256" ||
2608              Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2609              Name == "avx.cvt.ps2.pd.256" ||
2610              Name == "avx512.mask.cvtps2pd.128" ||
2611              Name == "avx512.mask.cvtps2pd.256") {
2612     auto *DstTy = cast<FixedVectorType>(CI->getType());
2613     Rep = CI->getArgOperand(0);
2614     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2615 
2616     unsigned NumDstElts = DstTy->getNumElements();
2617     if (NumDstElts < SrcTy->getNumElements()) {
2618       assert(NumDstElts == 2 && "Unexpected vector size");
2619       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2620     }
2621 
2622     bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2623     bool IsUnsigned = Name.contains("cvtu");
2624     if (IsPS2PD)
2625       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2626     else if (CI->arg_size() == 4 &&
2627              (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2628               cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2629       Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2630                                      : Intrinsic::x86_avx512_sitofp_round;
2631       Function *F =
2632           Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
2633       Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
2634     } else {
2635       Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2636                        : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2637     }
2638 
2639     if (CI->arg_size() >= 3)
2640       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2641                           CI->getArgOperand(1));
2642   } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2643              Name.starts_with("vcvtph2ps.")) {
2644     auto *DstTy = cast<FixedVectorType>(CI->getType());
2645     Rep = CI->getArgOperand(0);
2646     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2647     unsigned NumDstElts = DstTy->getNumElements();
2648     if (NumDstElts != SrcTy->getNumElements()) {
2649       assert(NumDstElts == 4 && "Unexpected vector size");
2650       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2651     }
2652     Rep = Builder.CreateBitCast(
2653         Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2654     Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2655     if (CI->arg_size() >= 3)
2656       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2657                           CI->getArgOperand(1));
2658   } else if (Name.starts_with("avx512.mask.load")) {
2659     // "avx512.mask.loadu." or "avx512.mask.load."
2660     bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2661     Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2662                             CI->getArgOperand(2), Aligned);
2663   } else if (Name.starts_with("avx512.mask.expand.load.")) {
2664     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2665     Type *PtrTy = ResultTy->getElementType();
2666 
2667     // Cast the pointer to element type.
2668     Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2669                                        llvm::PointerType::getUnqual(PtrTy));
2670 
2671     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2672                                    ResultTy->getNumElements());
2673 
2674     Function *ELd = Intrinsic::getDeclaration(
2675         F->getParent(), Intrinsic::masked_expandload, ResultTy);
2676     Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});
2677   } else if (Name.starts_with("avx512.mask.compress.store.")) {
2678     auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2679     Type *PtrTy = ResultTy->getElementType();
2680 
2681     // Cast the pointer to element type.
2682     Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2683                                        llvm::PointerType::getUnqual(PtrTy));
2684 
2685     Value *MaskVec =
2686         getX86MaskVec(Builder, CI->getArgOperand(2),
2687                       cast<FixedVectorType>(ResultTy)->getNumElements());
2688 
2689     Function *CSt = Intrinsic::getDeclaration(
2690         F->getParent(), Intrinsic::masked_compressstore, ResultTy);
2691     Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});
2692   } else if (Name.starts_with("avx512.mask.compress.") ||
2693              Name.starts_with("avx512.mask.expand.")) {
2694     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2695 
2696     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2697                                    ResultTy->getNumElements());
2698 
2699     bool IsCompress = Name[12] == 'c';
2700     Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2701                                    : Intrinsic::x86_avx512_mask_expand;
2702     Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2703     Rep = Builder.CreateCall(Intr,
2704                              {CI->getOperand(0), CI->getOperand(1), MaskVec});
2705   } else if (Name.starts_with("xop.vpcom")) {
2706     bool IsSigned;
2707     if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2708         Name.ends_with("uq"))
2709       IsSigned = false;
2710     else if (Name.ends_with("b") || Name.ends_with("w") ||
2711              Name.ends_with("d") || Name.ends_with("q"))
2712       IsSigned = true;
2713     else
2714       llvm_unreachable("Unknown suffix");
2715 
2716     unsigned Imm;
2717     if (CI->arg_size() == 3) {
2718       Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2719     } else {
2720       Name = Name.substr(9); // strip off "xop.vpcom"
2721       if (Name.starts_with("lt"))
2722         Imm = 0;
2723       else if (Name.starts_with("le"))
2724         Imm = 1;
2725       else if (Name.starts_with("gt"))
2726         Imm = 2;
2727       else if (Name.starts_with("ge"))
2728         Imm = 3;
2729       else if (Name.starts_with("eq"))
2730         Imm = 4;
2731       else if (Name.starts_with("ne"))
2732         Imm = 5;
2733       else if (Name.starts_with("false"))
2734         Imm = 6;
2735       else if (Name.starts_with("true"))
2736         Imm = 7;
2737       else
2738         llvm_unreachable("Unknown condition");
2739     }
2740 
2741     Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2742   } else if (Name.starts_with("xop.vpcmov")) {
2743     Value *Sel = CI->getArgOperand(2);
2744     Value *NotSel = Builder.CreateNot(Sel);
2745     Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2746     Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2747     Rep = Builder.CreateOr(Sel0, Sel1);
2748   } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2749              Name.starts_with("avx512.mask.prol")) {
2750     Rep = upgradeX86Rotate(Builder, *CI, false);
2751   } else if (Name.starts_with("avx512.pror") ||
2752              Name.starts_with("avx512.mask.pror")) {
2753     Rep = upgradeX86Rotate(Builder, *CI, true);
2754   } else if (Name.starts_with("avx512.vpshld.") ||
2755              Name.starts_with("avx512.mask.vpshld") ||
2756              Name.starts_with("avx512.maskz.vpshld")) {
2757     bool ZeroMask = Name[11] == 'z';
2758     Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2759   } else if (Name.starts_with("avx512.vpshrd.") ||
2760              Name.starts_with("avx512.mask.vpshrd") ||
2761              Name.starts_with("avx512.maskz.vpshrd")) {
2762     bool ZeroMask = Name[11] == 'z';
2763     Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2764   } else if (Name == "sse42.crc32.64.8") {
2765     Function *CRC32 = Intrinsic::getDeclaration(
2766         F->getParent(), Intrinsic::x86_sse42_crc32_32_8);
2767     Value *Trunc0 =
2768         Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2769     Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2770     Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2771   } else if (Name.starts_with("avx.vbroadcast.s") ||
2772              Name.starts_with("avx512.vbroadcast.s")) {
2773     // Replace broadcasts with a series of insertelements.
2774     auto *VecTy = cast<FixedVectorType>(CI->getType());
2775     Type *EltTy = VecTy->getElementType();
2776     unsigned EltNum = VecTy->getNumElements();
2777     Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2778     Type *I32Ty = Type::getInt32Ty(C);
2779     Rep = PoisonValue::get(VecTy);
2780     for (unsigned I = 0; I < EltNum; ++I)
2781       Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2782   } else if (Name.starts_with("sse41.pmovsx") ||
2783              Name.starts_with("sse41.pmovzx") ||
2784              Name.starts_with("avx2.pmovsx") ||
2785              Name.starts_with("avx2.pmovzx") ||
2786              Name.starts_with("avx512.mask.pmovsx") ||
2787              Name.starts_with("avx512.mask.pmovzx")) {
2788     auto *DstTy = cast<FixedVectorType>(CI->getType());
2789     unsigned NumDstElts = DstTy->getNumElements();
2790 
2791     // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2792     SmallVector<int, 8> ShuffleMask(NumDstElts);
2793     for (unsigned i = 0; i != NumDstElts; ++i)
2794       ShuffleMask[i] = i;
2795 
2796     Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2797 
2798     bool DoSext = Name.contains("pmovsx");
2799     Rep =
2800         DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2801     // If there are 3 arguments, it's a masked intrinsic so we need a select.
2802     if (CI->arg_size() == 3)
2803       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2804                           CI->getArgOperand(1));
2805   } else if (Name == "avx512.mask.pmov.qd.256" ||
2806              Name == "avx512.mask.pmov.qd.512" ||
2807              Name == "avx512.mask.pmov.wb.256" ||
2808              Name == "avx512.mask.pmov.wb.512") {
2809     Type *Ty = CI->getArgOperand(1)->getType();
2810     Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2811     Rep =
2812         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2813   } else if (Name.starts_with("avx.vbroadcastf128") ||
2814              Name == "avx2.vbroadcasti128") {
2815     // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2816     Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2817     unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2818     auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2819     Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2820                                           PointerType::getUnqual(VT));
2821     Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2822     if (NumSrcElts == 2)
2823       Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2824     else
2825       Rep = Builder.CreateShuffleVector(Load,
2826                                         ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2827   } else if (Name.starts_with("avx512.mask.shuf.i") ||
2828              Name.starts_with("avx512.mask.shuf.f")) {
2829     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2830     Type *VT = CI->getType();
2831     unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2832     unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2833     unsigned ControlBitsMask = NumLanes - 1;
2834     unsigned NumControlBits = NumLanes / 2;
2835     SmallVector<int, 8> ShuffleMask(0);
2836 
2837     for (unsigned l = 0; l != NumLanes; ++l) {
2838       unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2839       // We actually need the other source.
2840       if (l >= NumLanes / 2)
2841         LaneMask += NumLanes;
2842       for (unsigned i = 0; i != NumElementsInLane; ++i)
2843         ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2844     }
2845     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2846                                       CI->getArgOperand(1), ShuffleMask);
2847     Rep =
2848         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2849   } else if (Name.starts_with("avx512.mask.broadcastf") ||
2850              Name.starts_with("avx512.mask.broadcasti")) {
2851     unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2852                               ->getNumElements();
2853     unsigned NumDstElts =
2854         cast<FixedVectorType>(CI->getType())->getNumElements();
2855 
2856     SmallVector<int, 8> ShuffleMask(NumDstElts);
2857     for (unsigned i = 0; i != NumDstElts; ++i)
2858       ShuffleMask[i] = i % NumSrcElts;
2859 
2860     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2861                                       CI->getArgOperand(0), ShuffleMask);
2862     Rep =
2863         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2864   } else if (Name.starts_with("avx2.pbroadcast") ||
2865              Name.starts_with("avx2.vbroadcast") ||
2866              Name.starts_with("avx512.pbroadcast") ||
2867              Name.starts_with("avx512.mask.broadcast.s")) {
2868     // Replace vp?broadcasts with a vector shuffle.
2869     Value *Op = CI->getArgOperand(0);
2870     ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2871     Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2872     SmallVector<int, 8> M;
2873     ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2874     Rep = Builder.CreateShuffleVector(Op, M);
2875 
2876     if (CI->arg_size() == 3)
2877       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2878                           CI->getArgOperand(1));
2879   } else if (Name.starts_with("sse2.padds.") ||
2880              Name.starts_with("avx2.padds.") ||
2881              Name.starts_with("avx512.padds.") ||
2882              Name.starts_with("avx512.mask.padds.")) {
2883     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2884   } else if (Name.starts_with("sse2.psubs.") ||
2885              Name.starts_with("avx2.psubs.") ||
2886              Name.starts_with("avx512.psubs.") ||
2887              Name.starts_with("avx512.mask.psubs.")) {
2888     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2889   } else if (Name.starts_with("sse2.paddus.") ||
2890              Name.starts_with("avx2.paddus.") ||
2891              Name.starts_with("avx512.mask.paddus.")) {
2892     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2893   } else if (Name.starts_with("sse2.psubus.") ||
2894              Name.starts_with("avx2.psubus.") ||
2895              Name.starts_with("avx512.mask.psubus.")) {
2896     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2897   } else if (Name.starts_with("avx512.mask.palignr.")) {
2898     Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2899                                     CI->getArgOperand(1), CI->getArgOperand(2),
2900                                     CI->getArgOperand(3), CI->getArgOperand(4),
2901                                     false);
2902   } else if (Name.starts_with("avx512.mask.valign.")) {
2903     Rep = upgradeX86ALIGNIntrinsics(
2904         Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2905         CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
2906   } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
2907     // 128/256-bit shift left specified in bits.
2908     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2909     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2910                                      Shift / 8); // Shift is in bits.
2911   } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
2912     // 128/256-bit shift right specified in bits.
2913     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2915                                      Shift / 8); // Shift is in bits.
2916   } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
2917              Name == "avx512.psll.dq.512") {
2918     // 128/256/512-bit shift left specified in bytes.
2919     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2921   } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
2922              Name == "avx512.psrl.dq.512") {
2923     // 128/256/512-bit shift right specified in bytes.
2924     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2925     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2926   } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
2927              Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
2928              Name.starts_with("avx2.pblendd.")) {
2929     Value *Op0 = CI->getArgOperand(0);
2930     Value *Op1 = CI->getArgOperand(1);
2931     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2932     auto *VecTy = cast<FixedVectorType>(CI->getType());
2933     unsigned NumElts = VecTy->getNumElements();
2934 
2935     SmallVector<int, 16> Idxs(NumElts);
2936     for (unsigned i = 0; i != NumElts; ++i)
2937       Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
2938 
2939     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940   } else if (Name.starts_with("avx.vinsertf128.") ||
2941              Name == "avx2.vinserti128" ||
2942              Name.starts_with("avx512.mask.insert")) {
2943     Value *Op0 = CI->getArgOperand(0);
2944     Value *Op1 = CI->getArgOperand(1);
2945     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2946     unsigned DstNumElts =
2947         cast<FixedVectorType>(CI->getType())->getNumElements();
2948     unsigned SrcNumElts =
2949         cast<FixedVectorType>(Op1->getType())->getNumElements();
2950     unsigned Scale = DstNumElts / SrcNumElts;
2951 
2952     // Mask off the high bits of the immediate value; hardware ignores those.
2953     Imm = Imm % Scale;
2954 
2955     // Extend the second operand into a vector the size of the destination.
2956     SmallVector<int, 8> Idxs(DstNumElts);
2957     for (unsigned i = 0; i != SrcNumElts; ++i)
2958       Idxs[i] = i;
2959     for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2960       Idxs[i] = SrcNumElts;
2961     Rep = Builder.CreateShuffleVector(Op1, Idxs);
2962 
2963     // Insert the second operand into the first operand.
2964 
2965     // Note that there is no guarantee that instruction lowering will actually
2966     // produce a vinsertf128 instruction for the created shuffles. In
2967     // particular, the 0 immediate case involves no lane changes, so it can
2968     // be handled as a blend.
2969 
2970     // Example of shuffle mask for 32-bit elements:
2971     // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2972     // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2973 
2974     // First fill with identify mask.
2975     for (unsigned i = 0; i != DstNumElts; ++i)
2976       Idxs[i] = i;
2977     // Then replace the elements where we need to insert.
2978     for (unsigned i = 0; i != SrcNumElts; ++i)
2979       Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2980     Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2981 
2982     // If the intrinsic has a mask operand, handle that.
2983     if (CI->arg_size() == 5)
2984       Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
2985                           CI->getArgOperand(3));
2986   } else if (Name.starts_with("avx.vextractf128.") ||
2987              Name == "avx2.vextracti128" ||
2988              Name.starts_with("avx512.mask.vextract")) {
2989     Value *Op0 = CI->getArgOperand(0);
2990     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2991     unsigned DstNumElts =
2992         cast<FixedVectorType>(CI->getType())->getNumElements();
2993     unsigned SrcNumElts =
2994         cast<FixedVectorType>(Op0->getType())->getNumElements();
2995     unsigned Scale = SrcNumElts / DstNumElts;
2996 
2997     // Mask off the high bits of the immediate value; hardware ignores those.
2998     Imm = Imm % Scale;
2999 
3000     // Get indexes for the subvector of the input vector.
3001     SmallVector<int, 8> Idxs(DstNumElts);
3002     for (unsigned i = 0; i != DstNumElts; ++i) {
3003       Idxs[i] = i + (Imm * DstNumElts);
3004     }
3005     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3006 
3007     // If the intrinsic has a mask operand, handle that.
3008     if (CI->arg_size() == 4)
3009       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3010                           CI->getArgOperand(2));
3011   } else if (Name.starts_with("avx512.mask.perm.df.") ||
3012              Name.starts_with("avx512.mask.perm.di.")) {
3013     Value *Op0 = CI->getArgOperand(0);
3014     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3015     auto *VecTy = cast<FixedVectorType>(CI->getType());
3016     unsigned NumElts = VecTy->getNumElements();
3017 
3018     SmallVector<int, 8> Idxs(NumElts);
3019     for (unsigned i = 0; i != NumElts; ++i)
3020       Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3021 
3022     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3023 
3024     if (CI->arg_size() == 4)
3025       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3026                           CI->getArgOperand(2));
3027   } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3028     // The immediate permute control byte looks like this:
3029     //    [1:0] - select 128 bits from sources for low half of destination
3030     //    [2]   - ignore
3031     //    [3]   - zero low half of destination
3032     //    [5:4] - select 128 bits from sources for high half of destination
3033     //    [6]   - ignore
3034     //    [7]   - zero high half of destination
3035 
3036     uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3037 
3038     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3039     unsigned HalfSize = NumElts / 2;
3040     SmallVector<int, 8> ShuffleMask(NumElts);
3041 
3042     // Determine which operand(s) are actually in use for this instruction.
3043     Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3044     Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3045 
3046     // If needed, replace operands based on zero mask.
3047     V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3048     V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3049 
3050     // Permute low half of result.
3051     unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3052     for (unsigned i = 0; i < HalfSize; ++i)
3053       ShuffleMask[i] = StartIndex + i;
3054 
3055     // Permute high half of result.
3056     StartIndex = (Imm & 0x10) ? HalfSize : 0;
3057     for (unsigned i = 0; i < HalfSize; ++i)
3058       ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3059 
3060     Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3061 
3062   } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3063              Name.starts_with("avx512.mask.vpermil.p") ||
3064              Name.starts_with("avx512.mask.pshuf.d.")) {
3065     Value *Op0 = CI->getArgOperand(0);
3066     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067     auto *VecTy = cast<FixedVectorType>(CI->getType());
3068     unsigned NumElts = VecTy->getNumElements();
3069     // Calculate the size of each index in the immediate.
3070     unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3071     unsigned IdxMask = ((1 << IdxSize) - 1);
3072 
3073     SmallVector<int, 8> Idxs(NumElts);
3074     // Lookup the bits for this element, wrapping around the immediate every
3075     // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3076     // to offset by the first index of each group.
3077     for (unsigned i = 0; i != NumElts; ++i)
3078       Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3079 
3080     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3081 
3082     if (CI->arg_size() == 4)
3083       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3084                           CI->getArgOperand(2));
3085   } else if (Name == "sse2.pshufl.w" ||
3086              Name.starts_with("avx512.mask.pshufl.w.")) {
3087     Value *Op0 = CI->getArgOperand(0);
3088     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3090 
3091     SmallVector<int, 16> Idxs(NumElts);
3092     for (unsigned l = 0; l != NumElts; l += 8) {
3093       for (unsigned i = 0; i != 4; ++i)
3094         Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3095       for (unsigned i = 4; i != 8; ++i)
3096         Idxs[i + l] = i + l;
3097     }
3098 
3099     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3100 
3101     if (CI->arg_size() == 4)
3102       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3103                           CI->getArgOperand(2));
3104   } else if (Name == "sse2.pshufh.w" ||
3105              Name.starts_with("avx512.mask.pshufh.w.")) {
3106     Value *Op0 = CI->getArgOperand(0);
3107     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3108     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3109 
3110     SmallVector<int, 16> Idxs(NumElts);
3111     for (unsigned l = 0; l != NumElts; l += 8) {
3112       for (unsigned i = 0; i != 4; ++i)
3113         Idxs[i + l] = i + l;
3114       for (unsigned i = 0; i != 4; ++i)
3115         Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3116     }
3117 
3118     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3119 
3120     if (CI->arg_size() == 4)
3121       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3122                           CI->getArgOperand(2));
3123   } else if (Name.starts_with("avx512.mask.shuf.p")) {
3124     Value *Op0 = CI->getArgOperand(0);
3125     Value *Op1 = CI->getArgOperand(1);
3126     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3128 
3129     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3130     unsigned HalfLaneElts = NumLaneElts / 2;
3131 
3132     SmallVector<int, 16> Idxs(NumElts);
3133     for (unsigned i = 0; i != NumElts; ++i) {
3134       // Base index is the starting element of the lane.
3135       Idxs[i] = i - (i % NumLaneElts);
3136       // If we are half way through the lane switch to the other source.
3137       if ((i % NumLaneElts) >= HalfLaneElts)
3138         Idxs[i] += NumElts;
3139       // Now select the specific element. By adding HalfLaneElts bits from
3140       // the immediate. Wrapping around the immediate every 8-bits.
3141       Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3142     }
3143 
3144     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3145 
3146     Rep =
3147         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3148   } else if (Name.starts_with("avx512.mask.movddup") ||
3149              Name.starts_with("avx512.mask.movshdup") ||
3150              Name.starts_with("avx512.mask.movsldup")) {
3151     Value *Op0 = CI->getArgOperand(0);
3152     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3153     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3154 
3155     unsigned Offset = 0;
3156     if (Name.starts_with("avx512.mask.movshdup."))
3157       Offset = 1;
3158 
3159     SmallVector<int, 16> Idxs(NumElts);
3160     for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3161       for (unsigned i = 0; i != NumLaneElts; i += 2) {
3162         Idxs[i + l + 0] = i + l + Offset;
3163         Idxs[i + l + 1] = i + l + Offset;
3164       }
3165 
3166     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3167 
3168     Rep =
3169         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3170   } else if (Name.starts_with("avx512.mask.punpckl") ||
3171              Name.starts_with("avx512.mask.unpckl.")) {
3172     Value *Op0 = CI->getArgOperand(0);
3173     Value *Op1 = CI->getArgOperand(1);
3174     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3175     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3176 
3177     SmallVector<int, 64> Idxs(NumElts);
3178     for (int l = 0; l != NumElts; l += NumLaneElts)
3179       for (int i = 0; i != NumLaneElts; ++i)
3180         Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3181 
3182     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3183 
3184     Rep =
3185         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3186   } else if (Name.starts_with("avx512.mask.punpckh") ||
3187              Name.starts_with("avx512.mask.unpckh.")) {
3188     Value *Op0 = CI->getArgOperand(0);
3189     Value *Op1 = CI->getArgOperand(1);
3190     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3191     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3192 
3193     SmallVector<int, 64> Idxs(NumElts);
3194     for (int l = 0; l != NumElts; l += NumLaneElts)
3195       for (int i = 0; i != NumLaneElts; ++i)
3196         Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3197 
3198     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3199 
3200     Rep =
3201         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3202   } else if (Name.starts_with("avx512.mask.and.") ||
3203              Name.starts_with("avx512.mask.pand.")) {
3204     VectorType *FTy = cast<VectorType>(CI->getType());
3205     VectorType *ITy = VectorType::getInteger(FTy);
3206     Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3207                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3208     Rep = Builder.CreateBitCast(Rep, FTy);
3209     Rep =
3210         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3211   } else if (Name.starts_with("avx512.mask.andn.") ||
3212              Name.starts_with("avx512.mask.pandn.")) {
3213     VectorType *FTy = cast<VectorType>(CI->getType());
3214     VectorType *ITy = VectorType::getInteger(FTy);
3215     Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3216     Rep = Builder.CreateAnd(Rep,
3217                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3218     Rep = Builder.CreateBitCast(Rep, FTy);
3219     Rep =
3220         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3221   } else if (Name.starts_with("avx512.mask.or.") ||
3222              Name.starts_with("avx512.mask.por.")) {
3223     VectorType *FTy = cast<VectorType>(CI->getType());
3224     VectorType *ITy = VectorType::getInteger(FTy);
3225     Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3226                            Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3227     Rep = Builder.CreateBitCast(Rep, FTy);
3228     Rep =
3229         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3230   } else if (Name.starts_with("avx512.mask.xor.") ||
3231              Name.starts_with("avx512.mask.pxor.")) {
3232     VectorType *FTy = cast<VectorType>(CI->getType());
3233     VectorType *ITy = VectorType::getInteger(FTy);
3234     Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3235                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3236     Rep = Builder.CreateBitCast(Rep, FTy);
3237     Rep =
3238         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3239   } else if (Name.starts_with("avx512.mask.padd.")) {
3240     Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3241     Rep =
3242         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3243   } else if (Name.starts_with("avx512.mask.psub.")) {
3244     Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3245     Rep =
3246         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3247   } else if (Name.starts_with("avx512.mask.pmull.")) {
3248     Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3249     Rep =
3250         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3251   } else if (Name.starts_with("avx512.mask.add.p")) {
3252     if (Name.ends_with(".512")) {
3253       Intrinsic::ID IID;
3254       if (Name[17] == 's')
3255         IID = Intrinsic::x86_avx512_add_ps_512;
3256       else
3257         IID = Intrinsic::x86_avx512_add_pd_512;
3258 
3259       Rep = Builder.CreateCall(
3260           Intrinsic::getDeclaration(F->getParent(), IID),
3261           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3262     } else {
3263       Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3264     }
3265     Rep =
3266         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3267   } else if (Name.starts_with("avx512.mask.div.p")) {
3268     if (Name.ends_with(".512")) {
3269       Intrinsic::ID IID;
3270       if (Name[17] == 's')
3271         IID = Intrinsic::x86_avx512_div_ps_512;
3272       else
3273         IID = Intrinsic::x86_avx512_div_pd_512;
3274 
3275       Rep = Builder.CreateCall(
3276           Intrinsic::getDeclaration(F->getParent(), IID),
3277           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3278     } else {
3279       Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3280     }
3281     Rep =
3282         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3283   } else if (Name.starts_with("avx512.mask.mul.p")) {
3284     if (Name.ends_with(".512")) {
3285       Intrinsic::ID IID;
3286       if (Name[17] == 's')
3287         IID = Intrinsic::x86_avx512_mul_ps_512;
3288       else
3289         IID = Intrinsic::x86_avx512_mul_pd_512;
3290 
3291       Rep = Builder.CreateCall(
3292           Intrinsic::getDeclaration(F->getParent(), IID),
3293           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3294     } else {
3295       Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3296     }
3297     Rep =
3298         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3299   } else if (Name.starts_with("avx512.mask.sub.p")) {
3300     if (Name.ends_with(".512")) {
3301       Intrinsic::ID IID;
3302       if (Name[17] == 's')
3303         IID = Intrinsic::x86_avx512_sub_ps_512;
3304       else
3305         IID = Intrinsic::x86_avx512_sub_pd_512;
3306 
3307       Rep = Builder.CreateCall(
3308           Intrinsic::getDeclaration(F->getParent(), IID),
3309           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3310     } else {
3311       Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3312     }
3313     Rep =
3314         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3315   } else if ((Name.starts_with("avx512.mask.max.p") ||
3316               Name.starts_with("avx512.mask.min.p")) &&
3317              Name.drop_front(18) == ".512") {
3318     bool IsDouble = Name[17] == 'd';
3319     bool IsMin = Name[13] == 'i';
3320     static const Intrinsic::ID MinMaxTbl[2][2] = {
3321         {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3322         {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3323     Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3324 
3325     Rep = Builder.CreateCall(
3326         Intrinsic::getDeclaration(F->getParent(), IID),
3327         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3328     Rep =
3329         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330   } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3331     Rep =
3332         Builder.CreateCall(Intrinsic::getDeclaration(
3333                                F->getParent(), Intrinsic::ctlz, CI->getType()),
3334                            {CI->getArgOperand(0), Builder.getInt1(false)});
3335     Rep =
3336         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3337   } else if (Name.starts_with("avx512.mask.psll")) {
3338     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3339     bool IsVariable = Name[16] == 'v';
3340     char Size = Name[16] == '.'   ? Name[17]
3341                 : Name[17] == '.' ? Name[18]
3342                 : Name[18] == '.' ? Name[19]
3343                                   : Name[20];
3344 
3345     Intrinsic::ID IID;
3346     if (IsVariable && Name[17] != '.') {
3347       if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3348         IID = Intrinsic::x86_avx2_psllv_q;
3349       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3350         IID = Intrinsic::x86_avx2_psllv_q_256;
3351       else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3352         IID = Intrinsic::x86_avx2_psllv_d;
3353       else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3354         IID = Intrinsic::x86_avx2_psllv_d_256;
3355       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3356         IID = Intrinsic::x86_avx512_psllv_w_128;
3357       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3358         IID = Intrinsic::x86_avx512_psllv_w_256;
3359       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3360         IID = Intrinsic::x86_avx512_psllv_w_512;
3361       else
3362         llvm_unreachable("Unexpected size");
3363     } else if (Name.ends_with(".128")) {
3364       if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3365         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3366                           : Intrinsic::x86_sse2_psll_d;
3367       else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3368         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3369                           : Intrinsic::x86_sse2_psll_q;
3370       else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3371         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3372                           : Intrinsic::x86_sse2_psll_w;
3373       else
3374         llvm_unreachable("Unexpected size");
3375     } else if (Name.ends_with(".256")) {
3376       if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3377         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3378                           : Intrinsic::x86_avx2_psll_d;
3379       else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3380         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3381                           : Intrinsic::x86_avx2_psll_q;
3382       else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3383         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3384                           : Intrinsic::x86_avx2_psll_w;
3385       else
3386         llvm_unreachable("Unexpected size");
3387     } else {
3388       if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3389         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_d_512
3390               : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3391                            : Intrinsic::x86_avx512_psll_d_512;
3392       else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3393         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_q_512
3394               : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3395                            : Intrinsic::x86_avx512_psll_q_512;
3396       else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3397         IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3398                           : Intrinsic::x86_avx512_psll_w_512;
3399       else
3400         llvm_unreachable("Unexpected size");
3401     }
3402 
3403     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3404   } else if (Name.starts_with("avx512.mask.psrl")) {
3405     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3406     bool IsVariable = Name[16] == 'v';
3407     char Size = Name[16] == '.'   ? Name[17]
3408                 : Name[17] == '.' ? Name[18]
3409                 : Name[18] == '.' ? Name[19]
3410                                   : Name[20];
3411 
3412     Intrinsic::ID IID;
3413     if (IsVariable && Name[17] != '.') {
3414       if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3415         IID = Intrinsic::x86_avx2_psrlv_q;
3416       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3417         IID = Intrinsic::x86_avx2_psrlv_q_256;
3418       else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3419         IID = Intrinsic::x86_avx2_psrlv_d;
3420       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3421         IID = Intrinsic::x86_avx2_psrlv_d_256;
3422       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3423         IID = Intrinsic::x86_avx512_psrlv_w_128;
3424       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3425         IID = Intrinsic::x86_avx512_psrlv_w_256;
3426       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3427         IID = Intrinsic::x86_avx512_psrlv_w_512;
3428       else
3429         llvm_unreachable("Unexpected size");
3430     } else if (Name.ends_with(".128")) {
3431       if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3432         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3433                           : Intrinsic::x86_sse2_psrl_d;
3434       else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3435         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3436                           : Intrinsic::x86_sse2_psrl_q;
3437       else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3438         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3439                           : Intrinsic::x86_sse2_psrl_w;
3440       else
3441         llvm_unreachable("Unexpected size");
3442     } else if (Name.ends_with(".256")) {
3443       if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3444         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3445                           : Intrinsic::x86_avx2_psrl_d;
3446       else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3447         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3448                           : Intrinsic::x86_avx2_psrl_q;
3449       else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3450         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3451                           : Intrinsic::x86_avx2_psrl_w;
3452       else
3453         llvm_unreachable("Unexpected size");
3454     } else {
3455       if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3456         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_d_512
3457               : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3458                            : Intrinsic::x86_avx512_psrl_d_512;
3459       else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3460         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_q_512
3461               : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3462                            : Intrinsic::x86_avx512_psrl_q_512;
3463       else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3464         IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3465                           : Intrinsic::x86_avx512_psrl_w_512;
3466       else
3467         llvm_unreachable("Unexpected size");
3468     }
3469 
3470     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3471   } else if (Name.starts_with("avx512.mask.psra")) {
3472     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3473     bool IsVariable = Name[16] == 'v';
3474     char Size = Name[16] == '.'   ? Name[17]
3475                 : Name[17] == '.' ? Name[18]
3476                 : Name[18] == '.' ? Name[19]
3477                                   : Name[20];
3478 
3479     Intrinsic::ID IID;
3480     if (IsVariable && Name[17] != '.') {
3481       if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3482         IID = Intrinsic::x86_avx2_psrav_d;
3483       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3484         IID = Intrinsic::x86_avx2_psrav_d_256;
3485       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3486         IID = Intrinsic::x86_avx512_psrav_w_128;
3487       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3488         IID = Intrinsic::x86_avx512_psrav_w_256;
3489       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3490         IID = Intrinsic::x86_avx512_psrav_w_512;
3491       else
3492         llvm_unreachable("Unexpected size");
3493     } else if (Name.ends_with(".128")) {
3494       if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3495         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3496                           : Intrinsic::x86_sse2_psra_d;
3497       else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3498         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_128
3499               : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3500                            : Intrinsic::x86_avx512_psra_q_128;
3501       else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3502         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3503                           : Intrinsic::x86_sse2_psra_w;
3504       else
3505         llvm_unreachable("Unexpected size");
3506     } else if (Name.ends_with(".256")) {
3507       if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3508         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3509                           : Intrinsic::x86_avx2_psra_d;
3510       else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3511         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_256
3512               : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3513                            : Intrinsic::x86_avx512_psra_q_256;
3514       else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3515         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3516                           : Intrinsic::x86_avx2_psra_w;
3517       else
3518         llvm_unreachable("Unexpected size");
3519     } else {
3520       if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3521         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_d_512
3522               : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3523                            : Intrinsic::x86_avx512_psra_d_512;
3524       else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3525         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_512
3526               : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3527                            : Intrinsic::x86_avx512_psra_q_512;
3528       else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3529         IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3530                           : Intrinsic::x86_avx512_psra_w_512;
3531       else
3532         llvm_unreachable("Unexpected size");
3533     }
3534 
3535     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3536   } else if (Name.starts_with("avx512.mask.move.s")) {
3537     Rep = upgradeMaskedMove(Builder, *CI);
3538   } else if (Name.starts_with("avx512.cvtmask2")) {
3539     Rep = upgradeMaskToInt(Builder, *CI);
3540   } else if (Name.ends_with(".movntdqa")) {
3541     MDNode *Node = MDNode::get(
3542         C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3543 
3544     Value *Ptr = CI->getArgOperand(0);
3545 
3546     // Convert the type of the pointer to a pointer to the stored type.
3547     Value *BC = Builder.CreateBitCast(
3548         Ptr, PointerType::getUnqual(CI->getType()), "cast");
3549     LoadInst *LI = Builder.CreateAlignedLoad(
3550         CI->getType(), BC,
3551         Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3552     LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3553     Rep = LI;
3554   } else if (Name.starts_with("fma.vfmadd.") ||
3555              Name.starts_with("fma.vfmsub.") ||
3556              Name.starts_with("fma.vfnmadd.") ||
3557              Name.starts_with("fma.vfnmsub.")) {
3558     bool NegMul = Name[6] == 'n';
3559     bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3560     bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3561 
3562     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3563                     CI->getArgOperand(2)};
3564 
3565     if (IsScalar) {
3566       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3567       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3568       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3569     }
3570 
3571     if (NegMul && !IsScalar)
3572       Ops[0] = Builder.CreateFNeg(Ops[0]);
3573     if (NegMul && IsScalar)
3574       Ops[1] = Builder.CreateFNeg(Ops[1]);
3575     if (NegAcc)
3576       Ops[2] = Builder.CreateFNeg(Ops[2]);
3577 
3578     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3579                                                        Intrinsic::fma,
3580                                                        Ops[0]->getType()),
3581                              Ops);
3582 
3583     if (IsScalar)
3584       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3585   } else if (Name.starts_with("fma4.vfmadd.s")) {
3586     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3587                     CI->getArgOperand(2)};
3588 
3589     Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3590     Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3591     Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3592 
3593     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3594                                                        Intrinsic::fma,
3595                                                        Ops[0]->getType()),
3596                              Ops);
3597 
3598     Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3599                                       Rep, (uint64_t)0);
3600   } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3601              Name.starts_with("avx512.maskz.vfmadd.s") ||
3602              Name.starts_with("avx512.mask3.vfmadd.s") ||
3603              Name.starts_with("avx512.mask3.vfmsub.s") ||
3604              Name.starts_with("avx512.mask3.vfnmsub.s")) {
3605     bool IsMask3 = Name[11] == '3';
3606     bool IsMaskZ = Name[11] == 'z';
3607     // Drop the "avx512.mask." to make it easier.
3608     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3609     bool NegMul = Name[2] == 'n';
3610     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3611 
3612     Value *A = CI->getArgOperand(0);
3613     Value *B = CI->getArgOperand(1);
3614     Value *C = CI->getArgOperand(2);
3615 
3616     if (NegMul && (IsMask3 || IsMaskZ))
3617       A = Builder.CreateFNeg(A);
3618     if (NegMul && !(IsMask3 || IsMaskZ))
3619       B = Builder.CreateFNeg(B);
3620     if (NegAcc)
3621       C = Builder.CreateFNeg(C);
3622 
3623     A = Builder.CreateExtractElement(A, (uint64_t)0);
3624     B = Builder.CreateExtractElement(B, (uint64_t)0);
3625     C = Builder.CreateExtractElement(C, (uint64_t)0);
3626 
3627     if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3628         cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3629       Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3630 
3631       Intrinsic::ID IID;
3632       if (Name.back() == 'd')
3633         IID = Intrinsic::x86_avx512_vfmadd_f64;
3634       else
3635         IID = Intrinsic::x86_avx512_vfmadd_f32;
3636       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3637       Rep = Builder.CreateCall(FMA, Ops);
3638     } else {
3639       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3640                                                 A->getType());
3641       Rep = Builder.CreateCall(FMA, {A, B, C});
3642     }
3643 
3644     Value *PassThru = IsMaskZ   ? Constant::getNullValue(Rep->getType())
3645                       : IsMask3 ? C
3646                                 : A;
3647 
3648     // For Mask3 with NegAcc, we need to create a new extractelement that
3649     // avoids the negation above.
3650     if (NegAcc && IsMask3)
3651       PassThru =
3652           Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3653 
3654     Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3655     Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3656                                       (uint64_t)0);
3657   } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3658              Name.starts_with("avx512.mask.vfnmadd.p") ||
3659              Name.starts_with("avx512.mask.vfnmsub.p") ||
3660              Name.starts_with("avx512.mask3.vfmadd.p") ||
3661              Name.starts_with("avx512.mask3.vfmsub.p") ||
3662              Name.starts_with("avx512.mask3.vfnmsub.p") ||
3663              Name.starts_with("avx512.maskz.vfmadd.p")) {
3664     bool IsMask3 = Name[11] == '3';
3665     bool IsMaskZ = Name[11] == 'z';
3666     // Drop the "avx512.mask." to make it easier.
3667     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3668     bool NegMul = Name[2] == 'n';
3669     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3670 
3671     Value *A = CI->getArgOperand(0);
3672     Value *B = CI->getArgOperand(1);
3673     Value *C = CI->getArgOperand(2);
3674 
3675     if (NegMul && (IsMask3 || IsMaskZ))
3676       A = Builder.CreateFNeg(A);
3677     if (NegMul && !(IsMask3 || IsMaskZ))
3678       B = Builder.CreateFNeg(B);
3679     if (NegAcc)
3680       C = Builder.CreateFNeg(C);
3681 
3682     if (CI->arg_size() == 5 &&
3683         (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3684          cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3685       Intrinsic::ID IID;
3686       // Check the character before ".512" in string.
3687       if (Name[Name.size() - 5] == 's')
3688         IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3689       else
3690         IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3691 
3692       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3693                                {A, B, C, CI->getArgOperand(4)});
3694     } else {
3695       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3696                                                 A->getType());
3697       Rep = Builder.CreateCall(FMA, {A, B, C});
3698     }
3699 
3700     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
3701                       : IsMask3 ? CI->getArgOperand(2)
3702                                 : CI->getArgOperand(0);
3703 
3704     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3705   } else if (Name.starts_with("fma.vfmsubadd.p")) {
3706     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3707     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3708     Intrinsic::ID IID;
3709     if (VecWidth == 128 && EltWidth == 32)
3710       IID = Intrinsic::x86_fma_vfmaddsub_ps;
3711     else if (VecWidth == 256 && EltWidth == 32)
3712       IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3713     else if (VecWidth == 128 && EltWidth == 64)
3714       IID = Intrinsic::x86_fma_vfmaddsub_pd;
3715     else if (VecWidth == 256 && EltWidth == 64)
3716       IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3717     else
3718       llvm_unreachable("Unexpected intrinsic");
3719 
3720     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3721                     CI->getArgOperand(2)};
3722     Ops[2] = Builder.CreateFNeg(Ops[2]);
3723     Rep =
3724         Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);
3725   } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3726              Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3727              Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3728              Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3729     bool IsMask3 = Name[11] == '3';
3730     bool IsMaskZ = Name[11] == 'z';
3731     // Drop the "avx512.mask." to make it easier.
3732     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3733     bool IsSubAdd = Name[3] == 's';
3734     if (CI->arg_size() == 5) {
3735       Intrinsic::ID IID;
3736       // Check the character before ".512" in string.
3737       if (Name[Name.size() - 5] == 's')
3738         IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3739       else
3740         IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3741 
3742       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3743                       CI->getArgOperand(2), CI->getArgOperand(4)};
3744       if (IsSubAdd)
3745         Ops[2] = Builder.CreateFNeg(Ops[2]);
3746 
3747       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3748                                Ops);
3749     } else {
3750       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751 
3752       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3753                       CI->getArgOperand(2)};
3754 
3755       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3756                                                 Ops[0]->getType());
3757       Value *Odd = Builder.CreateCall(FMA, Ops);
3758       Ops[2] = Builder.CreateFNeg(Ops[2]);
3759       Value *Even = Builder.CreateCall(FMA, Ops);
3760 
3761       if (IsSubAdd)
3762         std::swap(Even, Odd);
3763 
3764       SmallVector<int, 32> Idxs(NumElts);
3765       for (int i = 0; i != NumElts; ++i)
3766         Idxs[i] = i + (i % 2) * NumElts;
3767 
3768       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3769     }
3770 
3771     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
3772                       : IsMask3 ? CI->getArgOperand(2)
3773                                 : CI->getArgOperand(0);
3774 
3775     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3776   } else if (Name.starts_with("avx512.mask.pternlog.") ||
3777              Name.starts_with("avx512.maskz.pternlog.")) {
3778     bool ZeroMask = Name[11] == 'z';
3779     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3780     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3781     Intrinsic::ID IID;
3782     if (VecWidth == 128 && EltWidth == 32)
3783       IID = Intrinsic::x86_avx512_pternlog_d_128;
3784     else if (VecWidth == 256 && EltWidth == 32)
3785       IID = Intrinsic::x86_avx512_pternlog_d_256;
3786     else if (VecWidth == 512 && EltWidth == 32)
3787       IID = Intrinsic::x86_avx512_pternlog_d_512;
3788     else if (VecWidth == 128 && EltWidth == 64)
3789       IID = Intrinsic::x86_avx512_pternlog_q_128;
3790     else if (VecWidth == 256 && EltWidth == 64)
3791       IID = Intrinsic::x86_avx512_pternlog_q_256;
3792     else if (VecWidth == 512 && EltWidth == 64)
3793       IID = Intrinsic::x86_avx512_pternlog_q_512;
3794     else
3795       llvm_unreachable("Unexpected intrinsic");
3796 
3797     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3798                      CI->getArgOperand(2), CI->getArgOperand(3)};
3799     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3800                              Args);
3801     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3802                                : CI->getArgOperand(0);
3803     Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3804   } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3805              Name.starts_with("avx512.maskz.vpmadd52")) {
3806     bool ZeroMask = Name[11] == 'z';
3807     bool High = Name[20] == 'h' || Name[21] == 'h';
3808     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3809     Intrinsic::ID IID;
3810     if (VecWidth == 128 && !High)
3811       IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3812     else if (VecWidth == 256 && !High)
3813       IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3814     else if (VecWidth == 512 && !High)
3815       IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3816     else if (VecWidth == 128 && High)
3817       IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3818     else if (VecWidth == 256 && High)
3819       IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3820     else if (VecWidth == 512 && High)
3821       IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3822     else
3823       llvm_unreachable("Unexpected intrinsic");
3824 
3825     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3826                      CI->getArgOperand(2)};
3827     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3828                              Args);
3829     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3830                                : CI->getArgOperand(0);
3831     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3832   } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3833              Name.starts_with("avx512.mask.vpermt2var.") ||
3834              Name.starts_with("avx512.maskz.vpermt2var.")) {
3835     bool ZeroMask = Name[11] == 'z';
3836     bool IndexForm = Name[17] == 'i';
3837     Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3838   } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3839              Name.starts_with("avx512.maskz.vpdpbusd.") ||
3840              Name.starts_with("avx512.mask.vpdpbusds.") ||
3841              Name.starts_with("avx512.maskz.vpdpbusds.")) {
3842     bool ZeroMask = Name[11] == 'z';
3843     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3844     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3845     Intrinsic::ID IID;
3846     if (VecWidth == 128 && !IsSaturating)
3847       IID = Intrinsic::x86_avx512_vpdpbusd_128;
3848     else if (VecWidth == 256 && !IsSaturating)
3849       IID = Intrinsic::x86_avx512_vpdpbusd_256;
3850     else if (VecWidth == 512 && !IsSaturating)
3851       IID = Intrinsic::x86_avx512_vpdpbusd_512;
3852     else if (VecWidth == 128 && IsSaturating)
3853       IID = Intrinsic::x86_avx512_vpdpbusds_128;
3854     else if (VecWidth == 256 && IsSaturating)
3855       IID = Intrinsic::x86_avx512_vpdpbusds_256;
3856     else if (VecWidth == 512 && IsSaturating)
3857       IID = Intrinsic::x86_avx512_vpdpbusds_512;
3858     else
3859       llvm_unreachable("Unexpected intrinsic");
3860 
3861     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3862                      CI->getArgOperand(2)};
3863     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3864                              Args);
3865     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3866                                : CI->getArgOperand(0);
3867     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3868   } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3869              Name.starts_with("avx512.maskz.vpdpwssd.") ||
3870              Name.starts_with("avx512.mask.vpdpwssds.") ||
3871              Name.starts_with("avx512.maskz.vpdpwssds.")) {
3872     bool ZeroMask = Name[11] == 'z';
3873     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3874     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3875     Intrinsic::ID IID;
3876     if (VecWidth == 128 && !IsSaturating)
3877       IID = Intrinsic::x86_avx512_vpdpwssd_128;
3878     else if (VecWidth == 256 && !IsSaturating)
3879       IID = Intrinsic::x86_avx512_vpdpwssd_256;
3880     else if (VecWidth == 512 && !IsSaturating)
3881       IID = Intrinsic::x86_avx512_vpdpwssd_512;
3882     else if (VecWidth == 128 && IsSaturating)
3883       IID = Intrinsic::x86_avx512_vpdpwssds_128;
3884     else if (VecWidth == 256 && IsSaturating)
3885       IID = Intrinsic::x86_avx512_vpdpwssds_256;
3886     else if (VecWidth == 512 && IsSaturating)
3887       IID = Intrinsic::x86_avx512_vpdpwssds_512;
3888     else
3889       llvm_unreachable("Unexpected intrinsic");
3890 
3891     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3892                      CI->getArgOperand(2)};
3893     Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3894                              Args);
3895     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3896                                : CI->getArgOperand(0);
3897     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3898   } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3899              Name == "addcarry.u32" || Name == "addcarry.u64" ||
3900              Name == "subborrow.u32" || Name == "subborrow.u64") {
3901     Intrinsic::ID IID;
3902     if (Name[0] == 'a' && Name.back() == '2')
3903       IID = Intrinsic::x86_addcarry_32;
3904     else if (Name[0] == 'a' && Name.back() == '4')
3905       IID = Intrinsic::x86_addcarry_64;
3906     else if (Name[0] == 's' && Name.back() == '2')
3907       IID = Intrinsic::x86_subborrow_32;
3908     else if (Name[0] == 's' && Name.back() == '4')
3909       IID = Intrinsic::x86_subborrow_64;
3910     else
3911       llvm_unreachable("Unexpected intrinsic");
3912 
3913     // Make a call with 3 operands.
3914     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915                      CI->getArgOperand(2)};
3916     Value *NewCall = Builder.CreateCall(
3917         Intrinsic::getDeclaration(CI->getModule(), IID), Args);
3918 
3919     // Extract the second result and store it.
3920     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3921     // Cast the pointer to the right type.
3922     Value *Ptr = Builder.CreateBitCast(
3923         CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
3924     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3925     // Replace the original call result with the first result of the new call.
3926     Value *CF = Builder.CreateExtractValue(NewCall, 0);
3927 
3928     CI->replaceAllUsesWith(CF);
3929     Rep = nullptr;
3930   } else if (Name.starts_with("avx512.mask.") &&
3931              upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3932     // Rep will be updated by the call in the condition.
3933   }
3934 
3935   return Rep;
3936 }
3937 
upgradeARMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)3938 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
3939                                       IRBuilder<> &Builder) {
3940   if (Name == "mve.vctp64.old") {
3941     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
3942     // correct type.
3943     Value *VCTP = Builder.CreateCall(
3944         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
3945         CI->getArgOperand(0), CI->getName());
3946     Value *C1 = Builder.CreateCall(
3947         Intrinsic::getDeclaration(
3948             F->getParent(), Intrinsic::arm_mve_pred_v2i,
3949             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
3950         VCTP);
3951     return Builder.CreateCall(
3952         Intrinsic::getDeclaration(
3953             F->getParent(), Intrinsic::arm_mve_pred_i2v,
3954             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
3955         C1);
3956   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
3957              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
3958              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
3959              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
3960              Name ==
3961                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
3962              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
3963              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
3964              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
3965              Name ==
3966                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
3967              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
3968              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
3969              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
3970              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
3971              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
3972              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
3973              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
3974     std::vector<Type *> Tys;
3975     unsigned ID = CI->getIntrinsicID();
3976     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
3977     switch (ID) {
3978     case Intrinsic::arm_mve_mull_int_predicated:
3979     case Intrinsic::arm_mve_vqdmull_predicated:
3980     case Intrinsic::arm_mve_vldr_gather_base_predicated:
3981       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
3982       break;
3983     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
3984     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
3985     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
3986       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
3987              V2I1Ty};
3988       break;
3989     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
3990       Tys = {CI->getType(), CI->getOperand(0)->getType(),
3991              CI->getOperand(1)->getType(), V2I1Ty};
3992       break;
3993     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
3994       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
3995              CI->getOperand(2)->getType(), V2I1Ty};
3996       break;
3997     case Intrinsic::arm_cde_vcx1q_predicated:
3998     case Intrinsic::arm_cde_vcx1qa_predicated:
3999     case Intrinsic::arm_cde_vcx2q_predicated:
4000     case Intrinsic::arm_cde_vcx2qa_predicated:
4001     case Intrinsic::arm_cde_vcx3q_predicated:
4002     case Intrinsic::arm_cde_vcx3qa_predicated:
4003       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4004       break;
4005     default:
4006       llvm_unreachable("Unhandled Intrinsic!");
4007     }
4008 
4009     std::vector<Value *> Ops;
4010     for (Value *Op : CI->args()) {
4011       Type *Ty = Op->getType();
4012       if (Ty->getScalarSizeInBits() == 1) {
4013         Value *C1 = Builder.CreateCall(
4014             Intrinsic::getDeclaration(
4015                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
4016                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
4017             Op);
4018         Op = Builder.CreateCall(
4019             Intrinsic::getDeclaration(F->getParent(),
4020                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
4021             C1);
4022       }
4023       Ops.push_back(Op);
4024     }
4025 
4026     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
4027     return Builder.CreateCall(Fn, Ops, CI->getName());
4028   }
4029   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4030 }
4031 
4032 // These are expected to have the arguments:
4033 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4034 //
4035 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4036 //
upgradeAMDGCNIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4037 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4038                                          Function *F, IRBuilder<> &Builder) {
4039   AtomicRMWInst::BinOp RMWOp =
4040       StringSwitch<AtomicRMWInst::BinOp>(Name)
4041           .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4042           .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4043           .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4044           .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4045           .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
4046 
4047   unsigned NumOperands = CI->getNumOperands();
4048   if (NumOperands < 3) // Malformed bitcode.
4049     return nullptr;
4050 
4051   Value *Ptr = CI->getArgOperand(0);
4052   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4053   if (!PtrTy) // Malformed.
4054     return nullptr;
4055 
4056   Value *Val = CI->getArgOperand(1);
4057   if (Val->getType() != CI->getType()) // Malformed.
4058     return nullptr;
4059 
4060   ConstantInt *OrderArg = nullptr;
4061   bool IsVolatile = false;
4062 
4063   // These should have 5 arguments (plus the callee). A separate version of the
4064   // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4065   if (NumOperands > 3)
4066     OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4067 
4068   // Ignore scope argument at 3
4069 
4070   if (NumOperands > 5) {
4071     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4072     IsVolatile = !VolatileArg || !VolatileArg->isZero();
4073   }
4074 
4075   AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4076   if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4077     Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4078   if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4079     Order = AtomicOrdering::SequentiallyConsistent;
4080 
4081   LLVMContext &Ctx = F->getContext();
4082 
4083   // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4084   Type *RetTy = CI->getType();
4085   if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4086     if (VT->getElementType()->isIntegerTy(16)) {
4087       VectorType *AsBF16 =
4088           VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4089       Val = Builder.CreateBitCast(Val, AsBF16);
4090     }
4091   }
4092 
4093   // The scope argument never really worked correctly. Use agent as the most
4094   // conservative option which should still always produce the instruction.
4095   SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4096   AtomicRMWInst *RMW =
4097       Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4098 
4099   if (PtrTy->getAddressSpace() != 3) {
4100     RMW->setMetadata("amdgpu.no.fine.grained.memory",
4101                      MDNode::get(F->getContext(), {}));
4102   }
4103 
4104   if (IsVolatile)
4105     RMW->setVolatile(true);
4106 
4107   return Builder.CreateBitCast(RMW, RetTy);
4108 }
4109 
4110 /// Helper to unwrap intrinsic call MetadataAsValue operands.
4111 template <typename MDType>
unwrapMAVOp(CallBase * CI,unsigned Op)4112 static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4113   if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4114     return dyn_cast<MDType>(MAV->getMetadata());
4115   return nullptr;
4116 }
4117 
4118 /// Convert debug intrinsic calls to non-instruction debug records.
4119 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4120 /// \p CI - The debug intrinsic call.
upgradeDbgIntrinsicToDbgRecord(StringRef Name,CallBase * CI)4121 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4122   DbgRecord *DR = nullptr;
4123   if (Name == "label") {
4124     DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4125   } else if (Name == "assign") {
4126     DR = new DbgVariableRecord(
4127         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4128         unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4129         unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4130         CI->getDebugLoc());
4131   } else if (Name == "declare") {
4132     DR = new DbgVariableRecord(
4133         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4134         unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4135         DbgVariableRecord::LocationType::Declare);
4136   } else if (Name == "addr") {
4137     // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4138     DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4139     Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4140     DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4141                                unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4142                                CI->getDebugLoc());
4143   } else if (Name == "value") {
4144     // An old version of dbg.value had an extra offset argument.
4145     unsigned VarOp = 1;
4146     unsigned ExprOp = 2;
4147     if (CI->arg_size() == 4) {
4148       auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4149       // Nonzero offset dbg.values get dropped without a replacement.
4150       if (!Offset || !Offset->isZeroValue())
4151         return;
4152       VarOp = 2;
4153       ExprOp = 3;
4154     }
4155     DR = new DbgVariableRecord(
4156         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4157         unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4158   }
4159   assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4160   CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4161 }
4162 
4163 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4164 /// provided to seamlessly integrate with existing context.
UpgradeIntrinsicCall(CallBase * CI,Function * NewFn)4165 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4166   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4167   // checks the callee's function type matches. It's likely we need to handle
4168   // type changes here.
4169   Function *F = dyn_cast<Function>(CI->getCalledOperand());
4170   if (!F)
4171     return;
4172 
4173   LLVMContext &C = CI->getContext();
4174   IRBuilder<> Builder(C);
4175   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4176 
4177   if (!NewFn) {
4178     bool FallthroughToDefaultUpgrade = false;
4179     // Get the Function's name.
4180     StringRef Name = F->getName();
4181 
4182     assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4183     Name = Name.substr(5);
4184 
4185     bool IsX86 = Name.consume_front("x86.");
4186     bool IsNVVM = Name.consume_front("nvvm.");
4187     bool IsARM = Name.consume_front("arm.");
4188     bool IsAMDGCN = Name.consume_front("amdgcn.");
4189     bool IsDbg = Name.consume_front("dbg.");
4190     Value *Rep = nullptr;
4191 
4192     if (!IsX86 && Name == "stackprotectorcheck") {
4193       Rep = nullptr;
4194     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4195       Value *Arg = CI->getArgOperand(0);
4196       Value *Neg = Builder.CreateNeg(Arg, "neg");
4197       Value *Cmp = Builder.CreateICmpSGE(
4198           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4199       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4200     } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4201                           Name.starts_with("atomic.load.add.f64.p"))) {
4202       Value *Ptr = CI->getArgOperand(0);
4203       Value *Val = CI->getArgOperand(1);
4204       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4205                                     AtomicOrdering::SequentiallyConsistent);
4206     } else if (IsNVVM && Name.consume_front("max.") &&
4207                (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4208                 Name == "ui" || Name == "ull")) {
4209       Value *Arg0 = CI->getArgOperand(0);
4210       Value *Arg1 = CI->getArgOperand(1);
4211       Value *Cmp = Name.starts_with("u")
4212                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4213                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4214       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4215     } else if (IsNVVM && Name.consume_front("min.") &&
4216                (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4217                 Name == "ui" || Name == "ull")) {
4218       Value *Arg0 = CI->getArgOperand(0);
4219       Value *Arg1 = CI->getArgOperand(1);
4220       Value *Cmp = Name.starts_with("u")
4221                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4222                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4223       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4224     } else if (IsNVVM && Name == "clz.ll") {
4225       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4226       Value *Arg = CI->getArgOperand(0);
4227       Value *Ctlz = Builder.CreateCall(
4228           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4229                                     {Arg->getType()}),
4230           {Arg, Builder.getFalse()}, "ctlz");
4231       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4232     } else if (IsNVVM && Name == "popc.ll") {
4233       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4234       // i64.
4235       Value *Arg = CI->getArgOperand(0);
4236       Value *Popc = Builder.CreateCall(
4237           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4238                                     {Arg->getType()}),
4239           Arg, "ctpop");
4240       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4241     } else if (IsNVVM) {
4242       if (Name == "h2f") {
4243         Rep =
4244             Builder.CreateCall(Intrinsic::getDeclaration(
4245                                    F->getParent(), Intrinsic::convert_from_fp16,
4246                                    {Builder.getFloatTy()}),
4247                                CI->getArgOperand(0), "h2f");
4248       } else {
4249         Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4250         if (IID != Intrinsic::not_intrinsic &&
4251             !F->getReturnType()->getScalarType()->isBFloatTy()) {
4252           rename(F);
4253           NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4254           SmallVector<Value *, 2> Args;
4255           for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4256             Value *Arg = CI->getArgOperand(I);
4257             Type *OldType = Arg->getType();
4258             Type *NewType = NewFn->getArg(I)->getType();
4259             Args.push_back((OldType->isIntegerTy() &&
4260                             NewType->getScalarType()->isBFloatTy())
4261                                ? Builder.CreateBitCast(Arg, NewType)
4262                                : Arg);
4263           }
4264           Rep = Builder.CreateCall(NewFn, Args);
4265           if (F->getReturnType()->isIntegerTy())
4266             Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4267         }
4268       }
4269     } else if (IsX86) {
4270       Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4271     } else if (IsARM) {
4272       Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4273     } else if (IsAMDGCN) {
4274       Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4275     } else if (IsDbg) {
4276       // We might have decided we don't want the new format after all between
4277       // first requesting the upgrade and now; skip the conversion if that is
4278       // the case, and check here to see if the intrinsic needs to be upgraded
4279       // normally.
4280       if (!CI->getModule()->IsNewDbgInfoFormat) {
4281         bool NeedsUpgrade =
4282             upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4283         if (!NeedsUpgrade)
4284           return;
4285         FallthroughToDefaultUpgrade = true;
4286       } else {
4287         upgradeDbgIntrinsicToDbgRecord(Name, CI);
4288       }
4289     } else {
4290       llvm_unreachable("Unknown function for CallBase upgrade.");
4291     }
4292 
4293     if (!FallthroughToDefaultUpgrade) {
4294       if (Rep)
4295         CI->replaceAllUsesWith(Rep);
4296       CI->eraseFromParent();
4297       return;
4298     }
4299   }
4300 
4301   const auto &DefaultCase = [&]() -> void {
4302     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4303       // Handle generic mangling change.
4304       assert(
4305           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4306           "Unknown function for CallBase upgrade and isn't just a name change");
4307       CI->setCalledFunction(NewFn);
4308       return;
4309     }
4310 
4311     // This must be an upgrade from a named to a literal struct.
4312     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4313       assert(OldST != NewFn->getReturnType() &&
4314              "Return type must have changed");
4315       assert(OldST->getNumElements() ==
4316                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4317              "Must have same number of elements");
4318 
4319       SmallVector<Value *> Args(CI->args());
4320       Value *NewCI = Builder.CreateCall(NewFn, Args);
4321       Value *Res = PoisonValue::get(OldST);
4322       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4323         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4324         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4325       }
4326       CI->replaceAllUsesWith(Res);
4327       CI->eraseFromParent();
4328       return;
4329     }
4330 
4331     // We're probably about to produce something invalid. Let the verifier catch
4332     // it instead of dying here.
4333     CI->setCalledOperand(
4334         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4335     return;
4336   };
4337   CallInst *NewCall = nullptr;
4338   switch (NewFn->getIntrinsicID()) {
4339   default: {
4340     DefaultCase();
4341     return;
4342   }
4343   case Intrinsic::arm_neon_vst1:
4344   case Intrinsic::arm_neon_vst2:
4345   case Intrinsic::arm_neon_vst3:
4346   case Intrinsic::arm_neon_vst4:
4347   case Intrinsic::arm_neon_vst2lane:
4348   case Intrinsic::arm_neon_vst3lane:
4349   case Intrinsic::arm_neon_vst4lane: {
4350     SmallVector<Value *, 4> Args(CI->args());
4351     NewCall = Builder.CreateCall(NewFn, Args);
4352     break;
4353   }
4354   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4355   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4356   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4357     LLVMContext &Ctx = F->getParent()->getContext();
4358     SmallVector<Value *, 4> Args(CI->args());
4359     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4360                                cast<ConstantInt>(Args[3])->getZExtValue());
4361     NewCall = Builder.CreateCall(NewFn, Args);
4362     break;
4363   }
4364   case Intrinsic::aarch64_sve_ld3_sret:
4365   case Intrinsic::aarch64_sve_ld4_sret:
4366   case Intrinsic::aarch64_sve_ld2_sret: {
4367     StringRef Name = F->getName();
4368     Name = Name.substr(5);
4369     unsigned N = StringSwitch<unsigned>(Name)
4370                      .StartsWith("aarch64.sve.ld2", 2)
4371                      .StartsWith("aarch64.sve.ld3", 3)
4372                      .StartsWith("aarch64.sve.ld4", 4)
4373                      .Default(0);
4374     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4375     unsigned MinElts = RetTy->getMinNumElements() / N;
4376     SmallVector<Value *, 2> Args(CI->args());
4377     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4378     Value *Ret = llvm::PoisonValue::get(RetTy);
4379     for (unsigned I = 0; I < N; I++) {
4380       Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4381       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4382       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4383     }
4384     NewCall = dyn_cast<CallInst>(Ret);
4385     break;
4386   }
4387 
4388   case Intrinsic::coro_end: {
4389     SmallVector<Value *, 3> Args(CI->args());
4390     Args.push_back(ConstantTokenNone::get(CI->getContext()));
4391     NewCall = Builder.CreateCall(NewFn, Args);
4392     break;
4393   }
4394 
4395   case Intrinsic::vector_extract: {
4396     StringRef Name = F->getName();
4397     Name = Name.substr(5); // Strip llvm
4398     if (!Name.starts_with("aarch64.sve.tuple.get")) {
4399       DefaultCase();
4400       return;
4401     }
4402     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4403     unsigned MinElts = RetTy->getMinNumElements();
4404     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4405     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4406     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4407     break;
4408   }
4409 
4410   case Intrinsic::vector_insert: {
4411     StringRef Name = F->getName();
4412     Name = Name.substr(5);
4413     if (!Name.starts_with("aarch64.sve.tuple")) {
4414       DefaultCase();
4415       return;
4416     }
4417     if (Name.starts_with("aarch64.sve.tuple.set")) {
4418       unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4419       auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4420       Value *NewIdx =
4421           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4422       NewCall = Builder.CreateCall(
4423           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4424       break;
4425     }
4426     if (Name.starts_with("aarch64.sve.tuple.create")) {
4427       unsigned N = StringSwitch<unsigned>(Name)
4428                        .StartsWith("aarch64.sve.tuple.create2", 2)
4429                        .StartsWith("aarch64.sve.tuple.create3", 3)
4430                        .StartsWith("aarch64.sve.tuple.create4", 4)
4431                        .Default(0);
4432       assert(N > 1 && "Create is expected to be between 2-4");
4433       auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4434       Value *Ret = llvm::PoisonValue::get(RetTy);
4435       unsigned MinElts = RetTy->getMinNumElements() / N;
4436       for (unsigned I = 0; I < N; I++) {
4437         Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4438         Value *V = CI->getArgOperand(I);
4439         Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4440       }
4441       NewCall = dyn_cast<CallInst>(Ret);
4442     }
4443     break;
4444   }
4445 
4446   case Intrinsic::arm_neon_bfdot:
4447   case Intrinsic::arm_neon_bfmmla:
4448   case Intrinsic::arm_neon_bfmlalb:
4449   case Intrinsic::arm_neon_bfmlalt:
4450   case Intrinsic::aarch64_neon_bfdot:
4451   case Intrinsic::aarch64_neon_bfmmla:
4452   case Intrinsic::aarch64_neon_bfmlalb:
4453   case Intrinsic::aarch64_neon_bfmlalt: {
4454     SmallVector<Value *, 3> Args;
4455     assert(CI->arg_size() == 3 &&
4456            "Mismatch between function args and call args");
4457     size_t OperandWidth =
4458         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4459     assert((OperandWidth == 64 || OperandWidth == 128) &&
4460            "Unexpected operand width");
4461     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4462     auto Iter = CI->args().begin();
4463     Args.push_back(*Iter++);
4464     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4465     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4466     NewCall = Builder.CreateCall(NewFn, Args);
4467     break;
4468   }
4469 
4470   case Intrinsic::bitreverse:
4471     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4472     break;
4473 
4474   case Intrinsic::ctlz:
4475   case Intrinsic::cttz:
4476     assert(CI->arg_size() == 1 &&
4477            "Mismatch between function args and call args");
4478     NewCall =
4479         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4480     break;
4481 
4482   case Intrinsic::objectsize: {
4483     Value *NullIsUnknownSize =
4484         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4485     Value *Dynamic =
4486         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4487     NewCall = Builder.CreateCall(
4488         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4489     break;
4490   }
4491 
4492   case Intrinsic::ctpop:
4493     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4494     break;
4495 
4496   case Intrinsic::convert_from_fp16:
4497     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4498     break;
4499 
4500   case Intrinsic::dbg_value: {
4501     StringRef Name = F->getName();
4502     Name = Name.substr(5); // Strip llvm.
4503     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4504     if (Name.starts_with("dbg.addr")) {
4505       DIExpression *Expr = cast<DIExpression>(
4506           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4507       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4508       NewCall =
4509           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4510                                      MetadataAsValue::get(C, Expr)});
4511       break;
4512     }
4513 
4514     // Upgrade from the old version that had an extra offset argument.
4515     assert(CI->arg_size() == 4);
4516     // Drop nonzero offsets instead of attempting to upgrade them.
4517     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4518       if (Offset->isZeroValue()) {
4519         NewCall = Builder.CreateCall(
4520             NewFn,
4521             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4522         break;
4523       }
4524     CI->eraseFromParent();
4525     return;
4526   }
4527 
4528   case Intrinsic::ptr_annotation:
4529     // Upgrade from versions that lacked the annotation attribute argument.
4530     if (CI->arg_size() != 4) {
4531       DefaultCase();
4532       return;
4533     }
4534 
4535     // Create a new call with an added null annotation attribute argument.
4536     NewCall =
4537         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4538                                    CI->getArgOperand(2), CI->getArgOperand(3),
4539                                    Constant::getNullValue(Builder.getPtrTy())});
4540     NewCall->takeName(CI);
4541     CI->replaceAllUsesWith(NewCall);
4542     CI->eraseFromParent();
4543     return;
4544 
4545   case Intrinsic::var_annotation:
4546     // Upgrade from versions that lacked the annotation attribute argument.
4547     if (CI->arg_size() != 4) {
4548       DefaultCase();
4549       return;
4550     }
4551     // Create a new call with an added null annotation attribute argument.
4552     NewCall =
4553         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4554                                    CI->getArgOperand(2), CI->getArgOperand(3),
4555                                    Constant::getNullValue(Builder.getPtrTy())});
4556     NewCall->takeName(CI);
4557     CI->replaceAllUsesWith(NewCall);
4558     CI->eraseFromParent();
4559     return;
4560 
4561   case Intrinsic::riscv_aes32dsi:
4562   case Intrinsic::riscv_aes32dsmi:
4563   case Intrinsic::riscv_aes32esi:
4564   case Intrinsic::riscv_aes32esmi:
4565   case Intrinsic::riscv_sm4ks:
4566   case Intrinsic::riscv_sm4ed: {
4567     // The last argument to these intrinsics used to be i8 and changed to i32.
4568     // The type overload for sm4ks and sm4ed was removed.
4569     Value *Arg2 = CI->getArgOperand(2);
4570     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4571       return;
4572 
4573     Value *Arg0 = CI->getArgOperand(0);
4574     Value *Arg1 = CI->getArgOperand(1);
4575     if (CI->getType()->isIntegerTy(64)) {
4576       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4577       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4578     }
4579 
4580     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4581                             cast<ConstantInt>(Arg2)->getZExtValue());
4582 
4583     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4584     Value *Res = NewCall;
4585     if (Res->getType() != CI->getType())
4586       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4587     NewCall->takeName(CI);
4588     CI->replaceAllUsesWith(Res);
4589     CI->eraseFromParent();
4590     return;
4591   }
4592   case Intrinsic::riscv_sha256sig0:
4593   case Intrinsic::riscv_sha256sig1:
4594   case Intrinsic::riscv_sha256sum0:
4595   case Intrinsic::riscv_sha256sum1:
4596   case Intrinsic::riscv_sm3p0:
4597   case Intrinsic::riscv_sm3p1: {
4598     // The last argument to these intrinsics used to be i8 and changed to i32.
4599     // The type overload for sm4ks and sm4ed was removed.
4600     if (!CI->getType()->isIntegerTy(64))
4601       return;
4602 
4603     Value *Arg =
4604         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4605 
4606     NewCall = Builder.CreateCall(NewFn, Arg);
4607     Value *Res =
4608         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4609     NewCall->takeName(CI);
4610     CI->replaceAllUsesWith(Res);
4611     CI->eraseFromParent();
4612     return;
4613   }
4614 
4615   case Intrinsic::x86_xop_vfrcz_ss:
4616   case Intrinsic::x86_xop_vfrcz_sd:
4617     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4618     break;
4619 
4620   case Intrinsic::x86_xop_vpermil2pd:
4621   case Intrinsic::x86_xop_vpermil2ps:
4622   case Intrinsic::x86_xop_vpermil2pd_256:
4623   case Intrinsic::x86_xop_vpermil2ps_256: {
4624     SmallVector<Value *, 4> Args(CI->args());
4625     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4626     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4627     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4628     NewCall = Builder.CreateCall(NewFn, Args);
4629     break;
4630   }
4631 
4632   case Intrinsic::x86_sse41_ptestc:
4633   case Intrinsic::x86_sse41_ptestz:
4634   case Intrinsic::x86_sse41_ptestnzc: {
4635     // The arguments for these intrinsics used to be v4f32, and changed
4636     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4637     // So, the only thing required is a bitcast for both arguments.
4638     // First, check the arguments have the old type.
4639     Value *Arg0 = CI->getArgOperand(0);
4640     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4641       return;
4642 
4643     // Old intrinsic, add bitcasts
4644     Value *Arg1 = CI->getArgOperand(1);
4645 
4646     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4647 
4648     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4649     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4650 
4651     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4652     break;
4653   }
4654 
4655   case Intrinsic::x86_rdtscp: {
4656     // This used to take 1 arguments. If we have no arguments, it is already
4657     // upgraded.
4658     if (CI->getNumOperands() == 0)
4659       return;
4660 
4661     NewCall = Builder.CreateCall(NewFn);
4662     // Extract the second result and store it.
4663     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4664     // Cast the pointer to the right type.
4665     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4666                                  llvm::PointerType::getUnqual(Data->getType()));
4667     Builder.CreateAlignedStore(Data, Ptr, Align(1));
4668     // Replace the original call result with the first result of the new call.
4669     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4670 
4671     NewCall->takeName(CI);
4672     CI->replaceAllUsesWith(TSC);
4673     CI->eraseFromParent();
4674     return;
4675   }
4676 
4677   case Intrinsic::x86_sse41_insertps:
4678   case Intrinsic::x86_sse41_dppd:
4679   case Intrinsic::x86_sse41_dpps:
4680   case Intrinsic::x86_sse41_mpsadbw:
4681   case Intrinsic::x86_avx_dp_ps_256:
4682   case Intrinsic::x86_avx2_mpsadbw: {
4683     // Need to truncate the last argument from i32 to i8 -- this argument models
4684     // an inherently 8-bit immediate operand to these x86 instructions.
4685     SmallVector<Value *, 4> Args(CI->args());
4686 
4687     // Replace the last argument with a trunc.
4688     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4689     NewCall = Builder.CreateCall(NewFn, Args);
4690     break;
4691   }
4692 
4693   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4694   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4695   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4696   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4697   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4698   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4699     SmallVector<Value *, 4> Args(CI->args());
4700     unsigned NumElts =
4701         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4702     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4703 
4704     NewCall = Builder.CreateCall(NewFn, Args);
4705     Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4706 
4707     NewCall->takeName(CI);
4708     CI->replaceAllUsesWith(Res);
4709     CI->eraseFromParent();
4710     return;
4711   }
4712 
4713   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4714   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4715   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4716   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4717   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4718   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4719     SmallVector<Value *, 4> Args(CI->args());
4720     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4721     if (NewFn->getIntrinsicID() ==
4722         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4723       Args[1] = Builder.CreateBitCast(
4724           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4725 
4726     NewCall = Builder.CreateCall(NewFn, Args);
4727     Value *Res = Builder.CreateBitCast(
4728         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4729 
4730     NewCall->takeName(CI);
4731     CI->replaceAllUsesWith(Res);
4732     CI->eraseFromParent();
4733     return;
4734   }
4735   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4736   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4737   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4738     SmallVector<Value *, 4> Args(CI->args());
4739     unsigned NumElts =
4740         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4741     Args[1] = Builder.CreateBitCast(
4742         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4743     Args[2] = Builder.CreateBitCast(
4744         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4745 
4746     NewCall = Builder.CreateCall(NewFn, Args);
4747     break;
4748   }
4749 
4750   case Intrinsic::thread_pointer: {
4751     NewCall = Builder.CreateCall(NewFn, {});
4752     break;
4753   }
4754 
4755   case Intrinsic::memcpy:
4756   case Intrinsic::memmove:
4757   case Intrinsic::memset: {
4758     // We have to make sure that the call signature is what we're expecting.
4759     // We only want to change the old signatures by removing the alignment arg:
4760     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4761     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4762     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4763     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4764     // Note: i8*'s in the above can be any pointer type
4765     if (CI->arg_size() != 5) {
4766       DefaultCase();
4767       return;
4768     }
4769     // Remove alignment argument (3), and add alignment attributes to the
4770     // dest/src pointers.
4771     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4772                       CI->getArgOperand(2), CI->getArgOperand(4)};
4773     NewCall = Builder.CreateCall(NewFn, Args);
4774     AttributeList OldAttrs = CI->getAttributes();
4775     AttributeList NewAttrs = AttributeList::get(
4776         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4777         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4778          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4779     NewCall->setAttributes(NewAttrs);
4780     auto *MemCI = cast<MemIntrinsic>(NewCall);
4781     // All mem intrinsics support dest alignment.
4782     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4783     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4784     // Memcpy/Memmove also support source alignment.
4785     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4786       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4787     break;
4788   }
4789   }
4790   assert(NewCall && "Should have either set this variable or returned through "
4791                     "the default case");
4792   NewCall->takeName(CI);
4793   CI->replaceAllUsesWith(NewCall);
4794   CI->eraseFromParent();
4795 }
4796 
UpgradeCallsToIntrinsic(Function * F)4797 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4798   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4799 
4800   // Check if this function should be upgraded and get the replacement function
4801   // if there is one.
4802   Function *NewFn;
4803   if (UpgradeIntrinsicFunction(F, NewFn)) {
4804     // Replace all users of the old function with the new function or new
4805     // instructions. This is not a range loop because the call is deleted.
4806     for (User *U : make_early_inc_range(F->users()))
4807       if (CallBase *CB = dyn_cast<CallBase>(U))
4808         UpgradeIntrinsicCall(CB, NewFn);
4809 
4810     // Remove old function, no longer used, from the module.
4811     F->eraseFromParent();
4812   }
4813 }
4814 
UpgradeTBAANode(MDNode & MD)4815 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4816   const unsigned NumOperands = MD.getNumOperands();
4817   if (NumOperands == 0)
4818     return &MD; // Invalid, punt to a verifier error.
4819 
4820   // Check if the tag uses struct-path aware TBAA format.
4821   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4822     return &MD;
4823 
4824   auto &Context = MD.getContext();
4825   if (NumOperands == 3) {
4826     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4827     MDNode *ScalarType = MDNode::get(Context, Elts);
4828     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4829     Metadata *Elts2[] = {ScalarType, ScalarType,
4830                          ConstantAsMetadata::get(
4831                              Constant::getNullValue(Type::getInt64Ty(Context))),
4832                          MD.getOperand(2)};
4833     return MDNode::get(Context, Elts2);
4834   }
4835   // Create a MDNode <MD, MD, offset 0>
4836   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4837                                     Type::getInt64Ty(Context)))};
4838   return MDNode::get(Context, Elts);
4839 }
4840 
UpgradeBitCastInst(unsigned Opc,Value * V,Type * DestTy,Instruction * & Temp)4841 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4842                                       Instruction *&Temp) {
4843   if (Opc != Instruction::BitCast)
4844     return nullptr;
4845 
4846   Temp = nullptr;
4847   Type *SrcTy = V->getType();
4848   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4849       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4850     LLVMContext &Context = V->getContext();
4851 
4852     // We have no information about target data layout, so we assume that
4853     // the maximum pointer size is 64bit.
4854     Type *MidTy = Type::getInt64Ty(Context);
4855     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4856 
4857     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4858   }
4859 
4860   return nullptr;
4861 }
4862 
UpgradeBitCastExpr(unsigned Opc,Constant * C,Type * DestTy)4863 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4864   if (Opc != Instruction::BitCast)
4865     return nullptr;
4866 
4867   Type *SrcTy = C->getType();
4868   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4869       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4870     LLVMContext &Context = C->getContext();
4871 
4872     // We have no information about target data layout, so we assume that
4873     // the maximum pointer size is 64bit.
4874     Type *MidTy = Type::getInt64Ty(Context);
4875 
4876     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4877                                      DestTy);
4878   }
4879 
4880   return nullptr;
4881 }
4882 
4883 /// Check the debug info version number, if it is out-dated, drop the debug
4884 /// info. Return true if module is modified.
UpgradeDebugInfo(Module & M)4885 bool llvm::UpgradeDebugInfo(Module &M) {
4886   if (DisableAutoUpgradeDebugInfo)
4887     return false;
4888 
4889   unsigned Version = getDebugMetadataVersionFromModule(M);
4890   if (Version == DEBUG_METADATA_VERSION) {
4891     bool BrokenDebugInfo = false;
4892     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4893       report_fatal_error("Broken module found, compilation aborted!");
4894     if (!BrokenDebugInfo)
4895       // Everything is ok.
4896       return false;
4897     else {
4898       // Diagnose malformed debug info.
4899       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4900       M.getContext().diagnose(Diag);
4901     }
4902   }
4903   bool Modified = StripDebugInfo(M);
4904   if (Modified && Version != DEBUG_METADATA_VERSION) {
4905     // Diagnose a version mismatch.
4906     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4907     M.getContext().diagnose(DiagVersion);
4908   }
4909   return Modified;
4910 }
4911 
4912 /// This checks for objc retain release marker which should be upgraded. It
4913 /// returns true if module is modified.
upgradeRetainReleaseMarker(Module & M)4914 static bool upgradeRetainReleaseMarker(Module &M) {
4915   bool Changed = false;
4916   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4917   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4918   if (ModRetainReleaseMarker) {
4919     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4920     if (Op) {
4921       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4922       if (ID) {
4923         SmallVector<StringRef, 4> ValueComp;
4924         ID->getString().split(ValueComp, "#");
4925         if (ValueComp.size() == 2) {
4926           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4927           ID = MDString::get(M.getContext(), NewValue);
4928         }
4929         M.addModuleFlag(Module::Error, MarkerKey, ID);
4930         M.eraseNamedMetadata(ModRetainReleaseMarker);
4931         Changed = true;
4932       }
4933     }
4934   }
4935   return Changed;
4936 }
4937 
UpgradeARCRuntime(Module & M)4938 void llvm::UpgradeARCRuntime(Module &M) {
4939   // This lambda converts normal function calls to ARC runtime functions to
4940   // intrinsic calls.
4941   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4942                                 llvm::Intrinsic::ID IntrinsicFunc) {
4943     Function *Fn = M.getFunction(OldFunc);
4944 
4945     if (!Fn)
4946       return;
4947 
4948     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4949 
4950     for (User *U : make_early_inc_range(Fn->users())) {
4951       CallInst *CI = dyn_cast<CallInst>(U);
4952       if (!CI || CI->getCalledFunction() != Fn)
4953         continue;
4954 
4955       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4956       FunctionType *NewFuncTy = NewFn->getFunctionType();
4957       SmallVector<Value *, 2> Args;
4958 
4959       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4960       // value to the return type of the old function.
4961       if (NewFuncTy->getReturnType() != CI->getType() &&
4962           !CastInst::castIsValid(Instruction::BitCast, CI,
4963                                  NewFuncTy->getReturnType()))
4964         continue;
4965 
4966       bool InvalidCast = false;
4967 
4968       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4969         Value *Arg = CI->getArgOperand(I);
4970 
4971         // Bitcast argument to the parameter type of the new function if it's
4972         // not a variadic argument.
4973         if (I < NewFuncTy->getNumParams()) {
4974           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4975           // to the parameter type of the new function.
4976           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4977                                      NewFuncTy->getParamType(I))) {
4978             InvalidCast = true;
4979             break;
4980           }
4981           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4982         }
4983         Args.push_back(Arg);
4984       }
4985 
4986       if (InvalidCast)
4987         continue;
4988 
4989       // Create a call instruction that calls the new function.
4990       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4991       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4992       NewCall->takeName(CI);
4993 
4994       // Bitcast the return value back to the type of the old call.
4995       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4996 
4997       if (!CI->use_empty())
4998         CI->replaceAllUsesWith(NewRetVal);
4999       CI->eraseFromParent();
5000     }
5001 
5002     if (Fn->use_empty())
5003       Fn->eraseFromParent();
5004   };
5005 
5006   // Unconditionally convert a call to "clang.arc.use" to a call to
5007   // "llvm.objc.clang.arc.use".
5008   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5009 
5010   // Upgrade the retain release marker. If there is no need to upgrade
5011   // the marker, that means either the module is already new enough to contain
5012   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5013   if (!upgradeRetainReleaseMarker(M))
5014     return;
5015 
5016   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5017       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5018       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5019       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5020       {"objc_autoreleaseReturnValue",
5021        llvm::Intrinsic::objc_autoreleaseReturnValue},
5022       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5023       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5024       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5025       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5026       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5027       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5028       {"objc_release", llvm::Intrinsic::objc_release},
5029       {"objc_retain", llvm::Intrinsic::objc_retain},
5030       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5031       {"objc_retainAutoreleaseReturnValue",
5032        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5033       {"objc_retainAutoreleasedReturnValue",
5034        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5035       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5036       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5037       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5038       {"objc_unsafeClaimAutoreleasedReturnValue",
5039        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5040       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5041       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5042       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5043       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5044       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5045       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5046       {"objc_arc_annotation_topdown_bbstart",
5047        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5048       {"objc_arc_annotation_topdown_bbend",
5049        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5050       {"objc_arc_annotation_bottomup_bbstart",
5051        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5052       {"objc_arc_annotation_bottomup_bbend",
5053        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5054 
5055   for (auto &I : RuntimeFuncs)
5056     UpgradeToIntrinsic(I.first, I.second);
5057 }
5058 
UpgradeModuleFlags(Module & M)5059 bool llvm::UpgradeModuleFlags(Module &M) {
5060   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5061   if (!ModFlags)
5062     return false;
5063 
5064   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5065   bool HasSwiftVersionFlag = false;
5066   uint8_t SwiftMajorVersion, SwiftMinorVersion;
5067   uint32_t SwiftABIVersion;
5068   auto Int8Ty = Type::getInt8Ty(M.getContext());
5069   auto Int32Ty = Type::getInt32Ty(M.getContext());
5070 
5071   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5072     MDNode *Op = ModFlags->getOperand(I);
5073     if (Op->getNumOperands() != 3)
5074       continue;
5075     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5076     if (!ID)
5077       continue;
5078     auto SetBehavior = [&](Module::ModFlagBehavior B) {
5079       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5080                               Type::getInt32Ty(M.getContext()), B)),
5081                           MDString::get(M.getContext(), ID->getString()),
5082                           Op->getOperand(2)};
5083       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5084       Changed = true;
5085     };
5086 
5087     if (ID->getString() == "Objective-C Image Info Version")
5088       HasObjCFlag = true;
5089     if (ID->getString() == "Objective-C Class Properties")
5090       HasClassProperties = true;
5091     // Upgrade PIC from Error/Max to Min.
5092     if (ID->getString() == "PIC Level") {
5093       if (auto *Behavior =
5094               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5095         uint64_t V = Behavior->getLimitedValue();
5096         if (V == Module::Error || V == Module::Max)
5097           SetBehavior(Module::Min);
5098       }
5099     }
5100     // Upgrade "PIE Level" from Error to Max.
5101     if (ID->getString() == "PIE Level")
5102       if (auto *Behavior =
5103               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5104         if (Behavior->getLimitedValue() == Module::Error)
5105           SetBehavior(Module::Max);
5106 
5107     // Upgrade branch protection and return address signing module flags. The
5108     // module flag behavior for these fields were Error and now they are Min.
5109     if (ID->getString() == "branch-target-enforcement" ||
5110         ID->getString().starts_with("sign-return-address")) {
5111       if (auto *Behavior =
5112               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5113         if (Behavior->getLimitedValue() == Module::Error) {
5114           Type *Int32Ty = Type::getInt32Ty(M.getContext());
5115           Metadata *Ops[3] = {
5116               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5117               Op->getOperand(1), Op->getOperand(2)};
5118           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5119           Changed = true;
5120         }
5121       }
5122     }
5123 
5124     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5125     // section name so that llvm-lto will not complain about mismatching
5126     // module flags that is functionally the same.
5127     if (ID->getString() == "Objective-C Image Info Section") {
5128       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5129         SmallVector<StringRef, 4> ValueComp;
5130         Value->getString().split(ValueComp, " ");
5131         if (ValueComp.size() != 1) {
5132           std::string NewValue;
5133           for (auto &S : ValueComp)
5134             NewValue += S.str();
5135           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5136                               MDString::get(M.getContext(), NewValue)};
5137           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5138           Changed = true;
5139         }
5140       }
5141     }
5142 
5143     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5144     // If the higher bits are set, it adds new module flag for swift info.
5145     if (ID->getString() == "Objective-C Garbage Collection") {
5146       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5147       if (Md) {
5148         assert(Md->getValue() && "Expected non-empty metadata");
5149         auto Type = Md->getValue()->getType();
5150         if (Type == Int8Ty)
5151           continue;
5152         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5153         if ((Val & 0xff) != Val) {
5154           HasSwiftVersionFlag = true;
5155           SwiftABIVersion = (Val & 0xff00) >> 8;
5156           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5157           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5158         }
5159         Metadata *Ops[3] = {
5160           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5161           Op->getOperand(1),
5162           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5163         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5164         Changed = true;
5165       }
5166     }
5167 
5168     if (ID->getString() == "amdgpu_code_object_version") {
5169       Metadata *Ops[3] = {
5170           Op->getOperand(0),
5171           MDString::get(M.getContext(), "amdhsa_code_object_version"),
5172           Op->getOperand(2)};
5173       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5174       Changed = true;
5175     }
5176   }
5177 
5178   // "Objective-C Class Properties" is recently added for Objective-C. We
5179   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5180   // flag of value 0, so we can correclty downgrade this flag when trying to
5181   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5182   // this module flag.
5183   if (HasObjCFlag && !HasClassProperties) {
5184     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5185                     (uint32_t)0);
5186     Changed = true;
5187   }
5188 
5189   if (HasSwiftVersionFlag) {
5190     M.addModuleFlag(Module::Error, "Swift ABI Version",
5191                     SwiftABIVersion);
5192     M.addModuleFlag(Module::Error, "Swift Major Version",
5193                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5194     M.addModuleFlag(Module::Error, "Swift Minor Version",
5195                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5196     Changed = true;
5197   }
5198 
5199   return Changed;
5200 }
5201 
UpgradeSectionAttributes(Module & M)5202 void llvm::UpgradeSectionAttributes(Module &M) {
5203   auto TrimSpaces = [](StringRef Section) -> std::string {
5204     SmallVector<StringRef, 5> Components;
5205     Section.split(Components, ',');
5206 
5207     SmallString<32> Buffer;
5208     raw_svector_ostream OS(Buffer);
5209 
5210     for (auto Component : Components)
5211       OS << ',' << Component.trim();
5212 
5213     return std::string(OS.str().substr(1));
5214   };
5215 
5216   for (auto &GV : M.globals()) {
5217     if (!GV.hasSection())
5218       continue;
5219 
5220     StringRef Section = GV.getSection();
5221 
5222     if (!Section.starts_with("__DATA, __objc_catlist"))
5223       continue;
5224 
5225     // __DATA, __objc_catlist, regular, no_dead_strip
5226     // __DATA,__objc_catlist,regular,no_dead_strip
5227     GV.setSection(TrimSpaces(Section));
5228   }
5229 }
5230 
5231 namespace {
5232 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5233 // callsites within a function that did not also have the strictfp attribute.
5234 // Since 10.0, if strict FP semantics are needed within a function, the
5235 // function must have the strictfp attribute and all calls within the function
5236 // must also have the strictfp attribute. This latter restriction is
5237 // necessary to prevent unwanted libcall simplification when a function is
5238 // being cloned (such as for inlining).
5239 //
5240 // The "dangling" strictfp attribute usage was only used to prevent constant
5241 // folding and other libcall simplification. The nobuiltin attribute on the
5242 // callsite has the same effect.
5243 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5244   StrictFPUpgradeVisitor() = default;
5245 
visitCallBase__anonaa1e81db0511::StrictFPUpgradeVisitor5246   void visitCallBase(CallBase &Call) {
5247     if (!Call.isStrictFP())
5248       return;
5249     if (isa<ConstrainedFPIntrinsic>(&Call))
5250       return;
5251     // If we get here, the caller doesn't have the strictfp attribute
5252     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5253     Call.removeFnAttr(Attribute::StrictFP);
5254     Call.addFnAttr(Attribute::NoBuiltin);
5255   }
5256 };
5257 } // namespace
5258 
UpgradeFunctionAttributes(Function & F)5259 void llvm::UpgradeFunctionAttributes(Function &F) {
5260   // If a function definition doesn't have the strictfp attribute,
5261   // convert any callsite strictfp attributes to nobuiltin.
5262   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5263     StrictFPUpgradeVisitor SFPV;
5264     SFPV.visit(F);
5265   }
5266 
5267   // Remove all incompatibile attributes from function.
5268   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5269   for (auto &Arg : F.args())
5270     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5271 
5272   // Older versions of LLVM treated an "implicit-section-name" attribute
5273   // similarly to directly setting the section on a Function.
5274   if (Attribute A = F.getFnAttribute("implicit-section-name");
5275       A.isValid() && A.isStringAttribute()) {
5276     F.setSection(A.getValueAsString());
5277     F.removeFnAttr("implicit-section-name");
5278   }
5279 }
5280 
isOldLoopArgument(Metadata * MD)5281 static bool isOldLoopArgument(Metadata *MD) {
5282   auto *T = dyn_cast_or_null<MDTuple>(MD);
5283   if (!T)
5284     return false;
5285   if (T->getNumOperands() < 1)
5286     return false;
5287   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5288   if (!S)
5289     return false;
5290   return S->getString().starts_with("llvm.vectorizer.");
5291 }
5292 
upgradeLoopTag(LLVMContext & C,StringRef OldTag)5293 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5294   StringRef OldPrefix = "llvm.vectorizer.";
5295   assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5296 
5297   if (OldTag == "llvm.vectorizer.unroll")
5298     return MDString::get(C, "llvm.loop.interleave.count");
5299 
5300   return MDString::get(
5301       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5302              .str());
5303 }
5304 
upgradeLoopArgument(Metadata * MD)5305 static Metadata *upgradeLoopArgument(Metadata *MD) {
5306   auto *T = dyn_cast_or_null<MDTuple>(MD);
5307   if (!T)
5308     return MD;
5309   if (T->getNumOperands() < 1)
5310     return MD;
5311   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5312   if (!OldTag)
5313     return MD;
5314   if (!OldTag->getString().starts_with("llvm.vectorizer."))
5315     return MD;
5316 
5317   // This has an old tag.  Upgrade it.
5318   SmallVector<Metadata *, 8> Ops;
5319   Ops.reserve(T->getNumOperands());
5320   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5321   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5322     Ops.push_back(T->getOperand(I));
5323 
5324   return MDTuple::get(T->getContext(), Ops);
5325 }
5326 
upgradeInstructionLoopAttachment(MDNode & N)5327 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5328   auto *T = dyn_cast<MDTuple>(&N);
5329   if (!T)
5330     return &N;
5331 
5332   if (none_of(T->operands(), isOldLoopArgument))
5333     return &N;
5334 
5335   SmallVector<Metadata *, 8> Ops;
5336   Ops.reserve(T->getNumOperands());
5337   for (Metadata *MD : T->operands())
5338     Ops.push_back(upgradeLoopArgument(MD));
5339 
5340   return MDTuple::get(T->getContext(), Ops);
5341 }
5342 
UpgradeDataLayoutString(StringRef DL,StringRef TT)5343 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5344   Triple T(TT);
5345   // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5346   // the address space of globals to 1. This does not apply to SPIRV Logical.
5347   if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5348        (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5349       !DL.contains("-G") && !DL.starts_with("G")) {
5350     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5351   }
5352 
5353   if (T.isLoongArch64() || T.isRISCV64()) {
5354     // Make i32 a native type for 64-bit LoongArch and RISC-V.
5355     auto I = DL.find("-n64-");
5356     if (I != StringRef::npos)
5357       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5358     return DL.str();
5359   }
5360 
5361   std::string Res = DL.str();
5362   // AMDGCN data layout upgrades.
5363   if (T.isAMDGCN()) {
5364     // Define address spaces for constants.
5365     if (!DL.contains("-G") && !DL.starts_with("G"))
5366       Res.append(Res.empty() ? "G1" : "-G1");
5367 
5368     // Add missing non-integral declarations.
5369     // This goes before adding new address spaces to prevent incoherent string
5370     // values.
5371     if (!DL.contains("-ni") && !DL.starts_with("ni"))
5372       Res.append("-ni:7:8:9");
5373     // Update ni:7 to ni:7:8:9.
5374     if (DL.ends_with("ni:7"))
5375       Res.append(":8:9");
5376     if (DL.ends_with("ni:7:8"))
5377       Res.append(":9");
5378 
5379     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5380     // resources) An empty data layout has already been upgraded to G1 by now.
5381     if (!DL.contains("-p7") && !DL.starts_with("p7"))
5382       Res.append("-p7:160:256:256:32");
5383     if (!DL.contains("-p8") && !DL.starts_with("p8"))
5384       Res.append("-p8:128:128");
5385     if (!DL.contains("-p9") && !DL.starts_with("p9"))
5386       Res.append("-p9:192:256:256:32");
5387 
5388     return Res;
5389   }
5390 
5391   // AArch64 data layout upgrades.
5392   if (T.isAArch64()) {
5393     // Add "-Fn32"
5394     if (!DL.empty() && !DL.contains("-Fn32"))
5395       Res.append("-Fn32");
5396     return Res;
5397   }
5398 
5399   if (!T.isX86())
5400     return Res;
5401 
5402   // If the datalayout matches the expected format, add pointer size address
5403   // spaces to the datalayout.
5404   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5405   if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5406     SmallVector<StringRef, 4> Groups;
5407     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5408     if (R.match(Res, &Groups))
5409       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5410   }
5411 
5412   // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5413   // for i128 operations prior to this being reflected in the data layout, and
5414   // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5415   // boundaries, so although this is a breaking change, the upgrade is expected
5416   // to fix more IR than it breaks.
5417   // Intel MCU is an exception and uses 4-byte-alignment.
5418   if (!T.isOSIAMCU()) {
5419     std::string I128 = "-i128:128";
5420     if (StringRef Ref = Res; !Ref.contains(I128)) {
5421       SmallVector<StringRef, 4> Groups;
5422       Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5423       if (R.match(Res, &Groups))
5424         Res = (Groups[1] + I128 + Groups[3]).str();
5425     }
5426   }
5427 
5428   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5429   // Raising the alignment is safe because Clang did not produce f80 values in
5430   // the MSVC environment before this upgrade was added.
5431   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5432     StringRef Ref = Res;
5433     auto I = Ref.find("-f80:32-");
5434     if (I != StringRef::npos)
5435       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5436   }
5437 
5438   return Res;
5439 }
5440 
UpgradeAttributes(AttrBuilder & B)5441 void llvm::UpgradeAttributes(AttrBuilder &B) {
5442   StringRef FramePointer;
5443   Attribute A = B.getAttribute("no-frame-pointer-elim");
5444   if (A.isValid()) {
5445     // The value can be "true" or "false".
5446     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5447     B.removeAttribute("no-frame-pointer-elim");
5448   }
5449   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5450     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5451     if (FramePointer != "all")
5452       FramePointer = "non-leaf";
5453     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5454   }
5455   if (!FramePointer.empty())
5456     B.addAttribute("frame-pointer", FramePointer);
5457 
5458   A = B.getAttribute("null-pointer-is-valid");
5459   if (A.isValid()) {
5460     // The value can be "true" or "false".
5461     bool NullPointerIsValid = A.getValueAsString() == "true";
5462     B.removeAttribute("null-pointer-is-valid");
5463     if (NullPointerIsValid)
5464       B.addAttribute(Attribute::NullPointerIsValid);
5465   }
5466 }
5467 
UpgradeOperandBundles(std::vector<OperandBundleDef> & Bundles)5468 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5469   // clang.arc.attachedcall bundles are now required to have an operand.
5470   // If they don't, it's okay to drop them entirely: when there is an operand,
5471   // the "attachedcall" is meaningful and required, but without an operand,
5472   // it's just a marker NOP.  Dropping it merely prevents an optimization.
5473   erase_if(Bundles, [&](OperandBundleDef &OBD) {
5474     return OBD.getTag() == "clang.arc.attachedcall" &&
5475            OBD.inputs().empty();
5476   });
5477 }
5478