xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/IR/AttributeMask.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DebugInfo.h"
26 #include "llvm/IR/DebugInfoMetadata.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Intrinsics.h"
34 #include "llvm/IR/IntrinsicsAArch64.h"
35 #include "llvm/IR/IntrinsicsARM.h"
36 #include "llvm/IR/IntrinsicsNVPTX.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/IntrinsicsWebAssembly.h"
39 #include "llvm/IR/IntrinsicsX86.h"
40 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/IR/MDBuilder.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/IR/Verifier.h"
46 #include "llvm/Support/AMDGPUAddrSpace.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/NVPTXAddrSpace.h"
50 #include "llvm/Support/Regex.h"
51 #include "llvm/TargetParser/Triple.h"
52 #include <cstdint>
53 #include <cstring>
54 #include <numeric>
55 
56 using namespace llvm;
57 
58 static cl::opt<bool>
59     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
60                                 cl::desc("Disable autoupgrade of debug info"));
61 
rename(GlobalValue * GV)62 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
63 
64 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
65 // changed their type from v4f32 to v2i64.
upgradePTESTIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)66 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
67                                   Function *&NewFn) {
68   // Check whether this is an old version of the function, which received
69   // v4f32 arguments.
70   Type *Arg0Type = F->getFunctionType()->getParamType(0);
71   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
72     return false;
73 
74   // Yes, it's old, replace it with new version.
75   rename(F);
76   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
77   return true;
78 }
79 
80 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
81 // arguments have changed their type from i32 to i8.
upgradeX86IntrinsicsWith8BitMask(Function * F,Intrinsic::ID IID,Function * & NewFn)82 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
83                                              Function *&NewFn) {
84   // Check that the last argument is an i32.
85   Type *LastArgType = F->getFunctionType()->getParamType(
86      F->getFunctionType()->getNumParams() - 1);
87   if (!LastArgType->isIntegerTy(32))
88     return false;
89 
90   // Move this function aside and map down.
91   rename(F);
92   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
93   return true;
94 }
95 
96 // Upgrade the declaration of fp compare intrinsics that change return type
97 // from scalar to vXi1 mask.
upgradeX86MaskedFPCompare(Function * F,Intrinsic::ID IID,Function * & NewFn)98 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
99                                       Function *&NewFn) {
100   // Check if the return type is a vector.
101   if (F->getReturnType()->isVectorTy())
102     return false;
103 
104   rename(F);
105   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106   return true;
107 }
108 
upgradeX86BF16Intrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)109 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
110                                     Function *&NewFn) {
111   if (F->getReturnType()->getScalarType()->isBFloatTy())
112     return false;
113 
114   rename(F);
115   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
116   return true;
117 }
118 
upgradeX86BF16DPIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)119 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
120                                       Function *&NewFn) {
121   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
122     return false;
123 
124   rename(F);
125   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126   return true;
127 }
128 
shouldUpgradeX86Intrinsic(Function * F,StringRef Name)129 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
130   // All of the intrinsics matches below should be marked with which llvm
131   // version started autoupgrading them. At some point in the future we would
132   // like to use this information to remove upgrade code for some older
133   // intrinsics. It is currently undecided how we will determine that future
134   // point.
135   if (Name.consume_front("avx."))
136     return (Name.starts_with("blend.p") ||        // Added in 3.7
137             Name == "cvt.ps2.pd.256" ||           // Added in 3.9
138             Name == "cvtdq2.pd.256" ||            // Added in 3.9
139             Name == "cvtdq2.ps.256" ||            // Added in 7.0
140             Name.starts_with("movnt.") ||         // Added in 3.2
141             Name.starts_with("sqrt.p") ||         // Added in 7.0
142             Name.starts_with("storeu.") ||        // Added in 3.9
143             Name.starts_with("vbroadcast.s") ||   // Added in 3.5
144             Name.starts_with("vbroadcastf128") || // Added in 4.0
145             Name.starts_with("vextractf128.") ||  // Added in 3.7
146             Name.starts_with("vinsertf128.") ||   // Added in 3.7
147             Name.starts_with("vperm2f128.") ||    // Added in 6.0
148             Name.starts_with("vpermil."));        // Added in 3.1
149 
150   if (Name.consume_front("avx2."))
151     return (Name == "movntdqa" ||             // Added in 5.0
152             Name.starts_with("pabs.") ||      // Added in 6.0
153             Name.starts_with("padds.") ||     // Added in 8.0
154             Name.starts_with("paddus.") ||    // Added in 8.0
155             Name.starts_with("pblendd.") ||   // Added in 3.7
156             Name == "pblendw" ||              // Added in 3.7
157             Name.starts_with("pbroadcast") || // Added in 3.8
158             Name.starts_with("pcmpeq.") ||    // Added in 3.1
159             Name.starts_with("pcmpgt.") ||    // Added in 3.1
160             Name.starts_with("pmax") ||       // Added in 3.9
161             Name.starts_with("pmin") ||       // Added in 3.9
162             Name.starts_with("pmovsx") ||     // Added in 3.9
163             Name.starts_with("pmovzx") ||     // Added in 3.9
164             Name == "pmul.dq" ||              // Added in 7.0
165             Name == "pmulu.dq" ||             // Added in 7.0
166             Name.starts_with("psll.dq") ||    // Added in 3.7
167             Name.starts_with("psrl.dq") ||    // Added in 3.7
168             Name.starts_with("psubs.") ||     // Added in 8.0
169             Name.starts_with("psubus.") ||    // Added in 8.0
170             Name.starts_with("vbroadcast") || // Added in 3.8
171             Name == "vbroadcasti128" ||       // Added in 3.7
172             Name == "vextracti128" ||         // Added in 3.7
173             Name == "vinserti128" ||          // Added in 3.7
174             Name == "vperm2i128");            // Added in 6.0
175 
176   if (Name.consume_front("avx512.")) {
177     if (Name.consume_front("mask."))
178       // 'avx512.mask.*'
179       return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
180               Name.starts_with("and.") ||        // Added in 3.9
181               Name.starts_with("andn.") ||       // Added in 3.9
182               Name.starts_with("broadcast.s") || // Added in 3.9
183               Name.starts_with("broadcastf32x4.") || // Added in 6.0
184               Name.starts_with("broadcastf32x8.") || // Added in 6.0
185               Name.starts_with("broadcastf64x2.") || // Added in 6.0
186               Name.starts_with("broadcastf64x4.") || // Added in 6.0
187               Name.starts_with("broadcasti32x4.") || // Added in 6.0
188               Name.starts_with("broadcasti32x8.") || // Added in 6.0
189               Name.starts_with("broadcasti64x2.") || // Added in 6.0
190               Name.starts_with("broadcasti64x4.") || // Added in 6.0
191               Name.starts_with("cmp.b") ||           // Added in 5.0
192               Name.starts_with("cmp.d") ||           // Added in 5.0
193               Name.starts_with("cmp.q") ||           // Added in 5.0
194               Name.starts_with("cmp.w") ||           // Added in 5.0
195               Name.starts_with("compress.b") ||      // Added in 9.0
196               Name.starts_with("compress.d") ||      // Added in 9.0
197               Name.starts_with("compress.p") ||      // Added in 9.0
198               Name.starts_with("compress.q") ||      // Added in 9.0
199               Name.starts_with("compress.store.") || // Added in 7.0
200               Name.starts_with("compress.w") ||      // Added in 9.0
201               Name.starts_with("conflict.") ||       // Added in 9.0
202               Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
203               Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
204               Name == "cvtpd2dq.256" ||              // Added in 7.0
205               Name == "cvtpd2ps.256" ||              // Added in 7.0
206               Name == "cvtps2pd.128" ||              // Added in 7.0
207               Name == "cvtps2pd.256" ||              // Added in 7.0
208               Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
209               Name == "cvtqq2ps.256" ||              // Added in 9.0
210               Name == "cvtqq2ps.512" ||              // Added in 9.0
211               Name == "cvttpd2dq.256" ||             // Added in 7.0
212               Name == "cvttps2dq.128" ||             // Added in 7.0
213               Name == "cvttps2dq.256" ||             // Added in 7.0
214               Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
215               Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
216               Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
217               Name == "cvtuqq2ps.256" ||             // Added in 9.0
218               Name == "cvtuqq2ps.512" ||             // Added in 9.0
219               Name.starts_with("dbpsadbw.") ||       // Added in 7.0
220               Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
221               Name.starts_with("expand.b") || // Added in 9.0
222               Name.starts_with("expand.d") || // Added in 9.0
223               Name.starts_with("expand.load.") || // Added in 7.0
224               Name.starts_with("expand.p") ||     // Added in 9.0
225               Name.starts_with("expand.q") ||     // Added in 9.0
226               Name.starts_with("expand.w") ||     // Added in 9.0
227               Name.starts_with("fpclass.p") ||    // Added in 7.0
228               Name.starts_with("insert") ||       // Added in 4.0
229               Name.starts_with("load.") ||        // Added in 3.9
230               Name.starts_with("loadu.") ||       // Added in 3.9
231               Name.starts_with("lzcnt.") ||       // Added in 5.0
232               Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
233               Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
234               Name.starts_with("movddup") ||     // Added in 3.9
235               Name.starts_with("move.s") ||      // Added in 4.0
236               Name.starts_with("movshdup") ||    // Added in 3.9
237               Name.starts_with("movsldup") ||    // Added in 3.9
238               Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
239               Name.starts_with("or.") ||         // Added in 3.9
240               Name.starts_with("pabs.") ||       // Added in 6.0
241               Name.starts_with("packssdw.") ||   // Added in 5.0
242               Name.starts_with("packsswb.") ||   // Added in 5.0
243               Name.starts_with("packusdw.") ||   // Added in 5.0
244               Name.starts_with("packuswb.") ||   // Added in 5.0
245               Name.starts_with("padd.") ||       // Added in 4.0
246               Name.starts_with("padds.") ||      // Added in 8.0
247               Name.starts_with("paddus.") ||     // Added in 8.0
248               Name.starts_with("palignr.") ||    // Added in 3.9
249               Name.starts_with("pand.") ||       // Added in 3.9
250               Name.starts_with("pandn.") ||      // Added in 3.9
251               Name.starts_with("pavg") ||        // Added in 6.0
252               Name.starts_with("pbroadcast") ||  // Added in 6.0
253               Name.starts_with("pcmpeq.") ||     // Added in 3.9
254               Name.starts_with("pcmpgt.") ||     // Added in 3.9
255               Name.starts_with("perm.df.") ||    // Added in 3.9
256               Name.starts_with("perm.di.") ||    // Added in 3.9
257               Name.starts_with("permvar.") ||    // Added in 7.0
258               Name.starts_with("pmaddubs.w.") || // Added in 7.0
259               Name.starts_with("pmaddw.d.") ||   // Added in 7.0
260               Name.starts_with("pmax") ||        // Added in 4.0
261               Name.starts_with("pmin") ||        // Added in 4.0
262               Name == "pmov.qd.256" ||           // Added in 9.0
263               Name == "pmov.qd.512" ||           // Added in 9.0
264               Name == "pmov.wb.256" ||           // Added in 9.0
265               Name == "pmov.wb.512" ||           // Added in 9.0
266               Name.starts_with("pmovsx") ||      // Added in 4.0
267               Name.starts_with("pmovzx") ||      // Added in 4.0
268               Name.starts_with("pmul.dq.") ||    // Added in 4.0
269               Name.starts_with("pmul.hr.sw.") || // Added in 7.0
270               Name.starts_with("pmulh.w.") ||    // Added in 7.0
271               Name.starts_with("pmulhu.w.") ||   // Added in 7.0
272               Name.starts_with("pmull.") ||      // Added in 4.0
273               Name.starts_with("pmultishift.qb.") || // Added in 8.0
274               Name.starts_with("pmulu.dq.") ||       // Added in 4.0
275               Name.starts_with("por.") ||            // Added in 3.9
276               Name.starts_with("prol.") ||           // Added in 8.0
277               Name.starts_with("prolv.") ||          // Added in 8.0
278               Name.starts_with("pror.") ||           // Added in 8.0
279               Name.starts_with("prorv.") ||          // Added in 8.0
280               Name.starts_with("pshuf.b.") ||        // Added in 4.0
281               Name.starts_with("pshuf.d.") ||        // Added in 3.9
282               Name.starts_with("pshufh.w.") ||       // Added in 3.9
283               Name.starts_with("pshufl.w.") ||       // Added in 3.9
284               Name.starts_with("psll.d") ||          // Added in 4.0
285               Name.starts_with("psll.q") ||          // Added in 4.0
286               Name.starts_with("psll.w") ||          // Added in 4.0
287               Name.starts_with("pslli") ||           // Added in 4.0
288               Name.starts_with("psllv") ||           // Added in 4.0
289               Name.starts_with("psra.d") ||          // Added in 4.0
290               Name.starts_with("psra.q") ||          // Added in 4.0
291               Name.starts_with("psra.w") ||          // Added in 4.0
292               Name.starts_with("psrai") ||           // Added in 4.0
293               Name.starts_with("psrav") ||           // Added in 4.0
294               Name.starts_with("psrl.d") ||          // Added in 4.0
295               Name.starts_with("psrl.q") ||          // Added in 4.0
296               Name.starts_with("psrl.w") ||          // Added in 4.0
297               Name.starts_with("psrli") ||           // Added in 4.0
298               Name.starts_with("psrlv") ||           // Added in 4.0
299               Name.starts_with("psub.") ||           // Added in 4.0
300               Name.starts_with("psubs.") ||          // Added in 8.0
301               Name.starts_with("psubus.") ||         // Added in 8.0
302               Name.starts_with("pternlog.") ||       // Added in 7.0
303               Name.starts_with("punpckh") ||         // Added in 3.9
304               Name.starts_with("punpckl") ||         // Added in 3.9
305               Name.starts_with("pxor.") ||           // Added in 3.9
306               Name.starts_with("shuf.f") ||          // Added in 6.0
307               Name.starts_with("shuf.i") ||          // Added in 6.0
308               Name.starts_with("shuf.p") ||          // Added in 4.0
309               Name.starts_with("sqrt.p") ||          // Added in 7.0
310               Name.starts_with("store.b.") ||        // Added in 3.9
311               Name.starts_with("store.d.") ||        // Added in 3.9
312               Name.starts_with("store.p") ||         // Added in 3.9
313               Name.starts_with("store.q.") ||        // Added in 3.9
314               Name.starts_with("store.w.") ||        // Added in 3.9
315               Name == "store.ss" ||                  // Added in 7.0
316               Name.starts_with("storeu.") ||         // Added in 3.9
317               Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
318               Name.starts_with("ucmp.") ||       // Added in 5.0
319               Name.starts_with("unpckh.") ||     // Added in 3.9
320               Name.starts_with("unpckl.") ||     // Added in 3.9
321               Name.starts_with("valign.") ||     // Added in 4.0
322               Name == "vcvtph2ps.128" ||         // Added in 11.0
323               Name == "vcvtph2ps.256" ||         // Added in 11.0
324               Name.starts_with("vextract") ||    // Added in 4.0
325               Name.starts_with("vfmadd.") ||     // Added in 7.0
326               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
327               Name.starts_with("vfnmadd.") ||    // Added in 7.0
328               Name.starts_with("vfnmsub.") ||    // Added in 7.0
329               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
330               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
331               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
332               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
333               Name.starts_with("vpermi2var.") || // Added in 7.0
334               Name.starts_with("vpermil.p") ||   // Added in 3.9
335               Name.starts_with("vpermilvar.") || // Added in 4.0
336               Name.starts_with("vpermt2var.") || // Added in 7.0
337               Name.starts_with("vpmadd52") ||    // Added in 7.0
338               Name.starts_with("vpshld.") ||     // Added in 7.0
339               Name.starts_with("vpshldv.") ||    // Added in 8.0
340               Name.starts_with("vpshrd.") ||     // Added in 7.0
341               Name.starts_with("vpshrdv.") ||    // Added in 8.0
342               Name.starts_with("vpshufbitqmb.") || // Added in 8.0
343               Name.starts_with("xor."));           // Added in 3.9
344 
345     if (Name.consume_front("mask3."))
346       // 'avx512.mask3.*'
347       return (Name.starts_with("vfmadd.") ||    // Added in 7.0
348               Name.starts_with("vfmaddsub.") || // Added in 7.0
349               Name.starts_with("vfmsub.") ||    // Added in 7.0
350               Name.starts_with("vfmsubadd.") || // Added in 7.0
351               Name.starts_with("vfnmsub."));    // Added in 7.0
352 
353     if (Name.consume_front("maskz."))
354       // 'avx512.maskz.*'
355       return (Name.starts_with("pternlog.") ||   // Added in 7.0
356               Name.starts_with("vfmadd.") ||     // Added in 7.0
357               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
358               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
359               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
360               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
361               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
362               Name.starts_with("vpermt2var.") || // Added in 7.0
363               Name.starts_with("vpmadd52") ||    // Added in 7.0
364               Name.starts_with("vpshldv.") ||    // Added in 8.0
365               Name.starts_with("vpshrdv."));     // Added in 8.0
366 
367     // 'avx512.*'
368     return (Name == "movntdqa" ||               // Added in 5.0
369             Name == "pmul.dq.512" ||            // Added in 7.0
370             Name == "pmulu.dq.512" ||           // Added in 7.0
371             Name.starts_with("broadcastm") ||   // Added in 6.0
372             Name.starts_with("cmp.p") ||        // Added in 12.0
373             Name.starts_with("cvtb2mask.") ||   // Added in 7.0
374             Name.starts_with("cvtd2mask.") ||   // Added in 7.0
375             Name.starts_with("cvtmask2") ||     // Added in 5.0
376             Name.starts_with("cvtq2mask.") ||   // Added in 7.0
377             Name == "cvtusi2sd" ||              // Added in 7.0
378             Name.starts_with("cvtw2mask.") ||   // Added in 7.0
379             Name == "kand.w" ||                 // Added in 7.0
380             Name == "kandn.w" ||                // Added in 7.0
381             Name == "knot.w" ||                 // Added in 7.0
382             Name == "kor.w" ||                  // Added in 7.0
383             Name == "kortestc.w" ||             // Added in 7.0
384             Name == "kortestz.w" ||             // Added in 7.0
385             Name.starts_with("kunpck") ||       // added in 6.0
386             Name == "kxnor.w" ||                // Added in 7.0
387             Name == "kxor.w" ||                 // Added in 7.0
388             Name.starts_with("padds.") ||       // Added in 8.0
389             Name.starts_with("pbroadcast") ||   // Added in 3.9
390             Name.starts_with("prol") ||         // Added in 8.0
391             Name.starts_with("pror") ||         // Added in 8.0
392             Name.starts_with("psll.dq") ||      // Added in 3.9
393             Name.starts_with("psrl.dq") ||      // Added in 3.9
394             Name.starts_with("psubs.") ||       // Added in 8.0
395             Name.starts_with("ptestm") ||       // Added in 6.0
396             Name.starts_with("ptestnm") ||      // Added in 6.0
397             Name.starts_with("storent.") ||     // Added in 3.9
398             Name.starts_with("vbroadcast.s") || // Added in 7.0
399             Name.starts_with("vpshld.") ||      // Added in 8.0
400             Name.starts_with("vpshrd."));       // Added in 8.0
401   }
402 
403   if (Name.consume_front("fma."))
404     return (Name.starts_with("vfmadd.") ||    // Added in 7.0
405             Name.starts_with("vfmsub.") ||    // Added in 7.0
406             Name.starts_with("vfmsubadd.") || // Added in 7.0
407             Name.starts_with("vfnmadd.") ||   // Added in 7.0
408             Name.starts_with("vfnmsub."));    // Added in 7.0
409 
410   if (Name.consume_front("fma4."))
411     return Name.starts_with("vfmadd.s"); // Added in 7.0
412 
413   if (Name.consume_front("sse."))
414     return (Name == "add.ss" ||            // Added in 4.0
415             Name == "cvtsi2ss" ||          // Added in 7.0
416             Name == "cvtsi642ss" ||        // Added in 7.0
417             Name == "div.ss" ||            // Added in 4.0
418             Name == "mul.ss" ||            // Added in 4.0
419             Name.starts_with("sqrt.p") ||  // Added in 7.0
420             Name == "sqrt.ss" ||           // Added in 7.0
421             Name.starts_with("storeu.") || // Added in 3.9
422             Name == "sub.ss");             // Added in 4.0
423 
424   if (Name.consume_front("sse2."))
425     return (Name == "add.sd" ||            // Added in 4.0
426             Name == "cvtdq2pd" ||          // Added in 3.9
427             Name == "cvtdq2ps" ||          // Added in 7.0
428             Name == "cvtps2pd" ||          // Added in 3.9
429             Name == "cvtsi2sd" ||          // Added in 7.0
430             Name == "cvtsi642sd" ||        // Added in 7.0
431             Name == "cvtss2sd" ||          // Added in 7.0
432             Name == "div.sd" ||            // Added in 4.0
433             Name == "mul.sd" ||            // Added in 4.0
434             Name.starts_with("padds.") ||  // Added in 8.0
435             Name.starts_with("paddus.") || // Added in 8.0
436             Name.starts_with("pcmpeq.") || // Added in 3.1
437             Name.starts_with("pcmpgt.") || // Added in 3.1
438             Name == "pmaxs.w" ||           // Added in 3.9
439             Name == "pmaxu.b" ||           // Added in 3.9
440             Name == "pmins.w" ||           // Added in 3.9
441             Name == "pminu.b" ||           // Added in 3.9
442             Name == "pmulu.dq" ||          // Added in 7.0
443             Name.starts_with("pshuf") ||   // Added in 3.9
444             Name.starts_with("psll.dq") || // Added in 3.7
445             Name.starts_with("psrl.dq") || // Added in 3.7
446             Name.starts_with("psubs.") ||  // Added in 8.0
447             Name.starts_with("psubus.") || // Added in 8.0
448             Name.starts_with("sqrt.p") ||  // Added in 7.0
449             Name == "sqrt.sd" ||           // Added in 7.0
450             Name == "storel.dq" ||         // Added in 3.9
451             Name.starts_with("storeu.") || // Added in 3.9
452             Name == "sub.sd");             // Added in 4.0
453 
454   if (Name.consume_front("sse41."))
455     return (Name.starts_with("blendp") || // Added in 3.7
456             Name == "movntdqa" ||         // Added in 5.0
457             Name == "pblendw" ||          // Added in 3.7
458             Name == "pmaxsb" ||           // Added in 3.9
459             Name == "pmaxsd" ||           // Added in 3.9
460             Name == "pmaxud" ||           // Added in 3.9
461             Name == "pmaxuw" ||           // Added in 3.9
462             Name == "pminsb" ||           // Added in 3.9
463             Name == "pminsd" ||           // Added in 3.9
464             Name == "pminud" ||           // Added in 3.9
465             Name == "pminuw" ||           // Added in 3.9
466             Name.starts_with("pmovsx") || // Added in 3.8
467             Name.starts_with("pmovzx") || // Added in 3.9
468             Name == "pmuldq");            // Added in 7.0
469 
470   if (Name.consume_front("sse42."))
471     return Name == "crc32.64.8"; // Added in 3.4
472 
473   if (Name.consume_front("sse4a."))
474     return Name.starts_with("movnt."); // Added in 3.9
475 
476   if (Name.consume_front("ssse3."))
477     return (Name == "pabs.b.128" || // Added in 6.0
478             Name == "pabs.d.128" || // Added in 6.0
479             Name == "pabs.w.128");  // Added in 6.0
480 
481   if (Name.consume_front("xop."))
482     return (Name == "vpcmov" ||          // Added in 3.8
483             Name == "vpcmov.256" ||      // Added in 5.0
484             Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
485             Name.starts_with("vprot"));  // Added in 8.0
486 
487   return (Name == "addcarry.u32" ||        // Added in 8.0
488           Name == "addcarry.u64" ||        // Added in 8.0
489           Name == "addcarryx.u32" ||       // Added in 8.0
490           Name == "addcarryx.u64" ||       // Added in 8.0
491           Name == "subborrow.u32" ||       // Added in 8.0
492           Name == "subborrow.u64" ||       // Added in 8.0
493           Name.starts_with("vcvtph2ps.")); // Added in 11.0
494 }
495 
upgradeX86IntrinsicFunction(Function * F,StringRef Name,Function * & NewFn)496 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
497                                         Function *&NewFn) {
498   // Only handle intrinsics that start with "x86.".
499   if (!Name.consume_front("x86."))
500     return false;
501 
502   if (shouldUpgradeX86Intrinsic(F, Name)) {
503     NewFn = nullptr;
504     return true;
505   }
506 
507   if (Name == "rdtscp") { // Added in 8.0
508     // If this intrinsic has 0 operands, it's the new version.
509     if (F->getFunctionType()->getNumParams() == 0)
510       return false;
511 
512     rename(F);
513     NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
514                                               Intrinsic::x86_rdtscp);
515     return true;
516   }
517 
518   Intrinsic::ID ID;
519 
520   // SSE4.1 ptest functions may have an old signature.
521   if (Name.consume_front("sse41.ptest")) { // Added in 3.2
522     ID = StringSwitch<Intrinsic::ID>(Name)
523              .Case("c", Intrinsic::x86_sse41_ptestc)
524              .Case("z", Intrinsic::x86_sse41_ptestz)
525              .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
526              .Default(Intrinsic::not_intrinsic);
527     if (ID != Intrinsic::not_intrinsic)
528       return upgradePTESTIntrinsic(F, ID, NewFn);
529 
530     return false;
531   }
532 
533   // Several blend and other instructions with masks used the wrong number of
534   // bits.
535 
536   // Added in 3.6
537   ID = StringSwitch<Intrinsic::ID>(Name)
538            .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
539            .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
540            .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
541            .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
542            .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
543            .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
544            .Default(Intrinsic::not_intrinsic);
545   if (ID != Intrinsic::not_intrinsic)
546     return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
547 
548   if (Name.consume_front("avx512.mask.cmp.")) {
549     // Added in 7.0
550     ID = StringSwitch<Intrinsic::ID>(Name)
551              .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
552              .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
553              .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
554              .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
555              .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
556              .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
557              .Default(Intrinsic::not_intrinsic);
558     if (ID != Intrinsic::not_intrinsic)
559       return upgradeX86MaskedFPCompare(F, ID, NewFn);
560     return false; // No other 'x86.avx523.mask.cmp.*'.
561   }
562 
563   if (Name.consume_front("avx512bf16.")) {
564     // Added in 9.0
565     ID = StringSwitch<Intrinsic::ID>(Name)
566              .Case("cvtne2ps2bf16.128",
567                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
568              .Case("cvtne2ps2bf16.256",
569                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
570              .Case("cvtne2ps2bf16.512",
571                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
572              .Case("mask.cvtneps2bf16.128",
573                    Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
574              .Case("cvtneps2bf16.256",
575                    Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
576              .Case("cvtneps2bf16.512",
577                    Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
578              .Default(Intrinsic::not_intrinsic);
579     if (ID != Intrinsic::not_intrinsic)
580       return upgradeX86BF16Intrinsic(F, ID, NewFn);
581 
582     // Added in 9.0
583     ID = StringSwitch<Intrinsic::ID>(Name)
584              .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
585              .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
586              .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
587              .Default(Intrinsic::not_intrinsic);
588     if (ID != Intrinsic::not_intrinsic)
589       return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
590     return false; // No other 'x86.avx512bf16.*'.
591   }
592 
593   if (Name.consume_front("xop.")) {
594     Intrinsic::ID ID = Intrinsic::not_intrinsic;
595     if (Name.starts_with("vpermil2")) { // Added in 3.9
596       // Upgrade any XOP PERMIL2 index operand still using a float/double
597       // vector.
598       auto Idx = F->getFunctionType()->getParamType(2);
599       if (Idx->isFPOrFPVectorTy()) {
600         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
601         unsigned EltSize = Idx->getScalarSizeInBits();
602         if (EltSize == 64 && IdxSize == 128)
603           ID = Intrinsic::x86_xop_vpermil2pd;
604         else if (EltSize == 32 && IdxSize == 128)
605           ID = Intrinsic::x86_xop_vpermil2ps;
606         else if (EltSize == 64 && IdxSize == 256)
607           ID = Intrinsic::x86_xop_vpermil2pd_256;
608         else
609           ID = Intrinsic::x86_xop_vpermil2ps_256;
610       }
611     } else if (F->arg_size() == 2)
612       // frcz.ss/sd may need to have an argument dropped. Added in 3.2
613       ID = StringSwitch<Intrinsic::ID>(Name)
614                .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
615                .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
616                .Default(Intrinsic::not_intrinsic);
617 
618     if (ID != Intrinsic::not_intrinsic) {
619       rename(F);
620       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
621       return true;
622     }
623     return false; // No other 'x86.xop.*'
624   }
625 
626   if (Name == "seh.recoverfp") {
627     NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
628                                               Intrinsic::eh_recoverfp);
629     return true;
630   }
631 
632   return false;
633 }
634 
635 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
636 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
upgradeArmOrAarch64IntrinsicFunction(bool IsArm,Function * F,StringRef Name,Function * & NewFn)637 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
638                                                  StringRef Name,
639                                                  Function *&NewFn) {
640   if (Name.starts_with("rbit")) {
641     // '(arm|aarch64).rbit'.
642     NewFn = Intrinsic::getOrInsertDeclaration(
643         F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
644     return true;
645   }
646 
647   if (Name == "thread.pointer") {
648     // '(arm|aarch64).thread.pointer'.
649     NewFn = Intrinsic::getOrInsertDeclaration(
650         F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
651     return true;
652   }
653 
654   bool Neon = Name.consume_front("neon.");
655   if (Neon) {
656     // '(arm|aarch64).neon.*'.
657     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
658     // v16i8 respectively.
659     if (Name.consume_front("bfdot.")) {
660       // (arm|aarch64).neon.bfdot.*'.
661       Intrinsic::ID ID =
662           StringSwitch<Intrinsic::ID>(Name)
663               .Cases("v2f32.v8i8", "v4f32.v16i8",
664                      IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
665                            : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
666               .Default(Intrinsic::not_intrinsic);
667       if (ID != Intrinsic::not_intrinsic) {
668         size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
669         assert((OperandWidth == 64 || OperandWidth == 128) &&
670                "Unexpected operand width");
671         LLVMContext &Ctx = F->getParent()->getContext();
672         std::array<Type *, 2> Tys{
673             {F->getReturnType(),
674              FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
675         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
676         return true;
677       }
678       return false; // No other '(arm|aarch64).neon.bfdot.*'.
679     }
680 
681     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
682     // anymore and accept v8bf16 instead of v16i8.
683     if (Name.consume_front("bfm")) {
684       // (arm|aarch64).neon.bfm*'.
685       if (Name.consume_back(".v4f32.v16i8")) {
686         // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
687         Intrinsic::ID ID =
688             StringSwitch<Intrinsic::ID>(Name)
689                 .Case("mla",
690                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
691                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
692                 .Case("lalb",
693                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
694                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
695                 .Case("lalt",
696                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
697                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
698                 .Default(Intrinsic::not_intrinsic);
699         if (ID != Intrinsic::not_intrinsic) {
700           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
701           return true;
702         }
703         return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
704       }
705       return false; // No other '(arm|aarch64).neon.bfm*.
706     }
707     // Continue on to Aarch64 Neon or Arm Neon.
708   }
709   // Continue on to Arm or Aarch64.
710 
711   if (IsArm) {
712     // 'arm.*'.
713     if (Neon) {
714       // 'arm.neon.*'.
715       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
716                              .StartsWith("vclz.", Intrinsic::ctlz)
717                              .StartsWith("vcnt.", Intrinsic::ctpop)
718                              .StartsWith("vqadds.", Intrinsic::sadd_sat)
719                              .StartsWith("vqaddu.", Intrinsic::uadd_sat)
720                              .StartsWith("vqsubs.", Intrinsic::ssub_sat)
721                              .StartsWith("vqsubu.", Intrinsic::usub_sat)
722                              .StartsWith("vrinta.", Intrinsic::round)
723                              .StartsWith("vrintn.", Intrinsic::roundeven)
724                              .StartsWith("vrintm.", Intrinsic::floor)
725                              .StartsWith("vrintp.", Intrinsic::ceil)
726                              .StartsWith("vrintx.", Intrinsic::rint)
727                              .StartsWith("vrintz.", Intrinsic::trunc)
728                              .Default(Intrinsic::not_intrinsic);
729       if (ID != Intrinsic::not_intrinsic) {
730         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
731                                                   F->arg_begin()->getType());
732         return true;
733       }
734 
735       if (Name.consume_front("vst")) {
736         // 'arm.neon.vst*'.
737         static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
738         SmallVector<StringRef, 2> Groups;
739         if (vstRegex.match(Name, &Groups)) {
740           static const Intrinsic::ID StoreInts[] = {
741               Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
742               Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
743 
744           static const Intrinsic::ID StoreLaneInts[] = {
745               Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
746               Intrinsic::arm_neon_vst4lane};
747 
748           auto fArgs = F->getFunctionType()->params();
749           Type *Tys[] = {fArgs[0], fArgs[1]};
750           if (Groups[1].size() == 1)
751             NewFn = Intrinsic::getOrInsertDeclaration(
752                 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
753           else
754             NewFn = Intrinsic::getOrInsertDeclaration(
755                 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
756           return true;
757         }
758         return false; // No other 'arm.neon.vst*'.
759       }
760 
761       return false; // No other 'arm.neon.*'.
762     }
763 
764     if (Name.consume_front("mve.")) {
765       // 'arm.mve.*'.
766       if (Name == "vctp64") {
767         if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
768           // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
769           // the function and deal with it below in UpgradeIntrinsicCall.
770           rename(F);
771           return true;
772         }
773         return false; // Not 'arm.mve.vctp64'.
774       }
775 
776       if (Name.starts_with("vrintn.v")) {
777         NewFn = Intrinsic::getOrInsertDeclaration(
778             F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
779         return true;
780       }
781 
782       // These too are changed to accept a v2i1 instead of the old v4i1.
783       if (Name.consume_back(".v4i1")) {
784         // 'arm.mve.*.v4i1'.
785         if (Name.consume_back(".predicated.v2i64.v4i32"))
786           // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
787           return Name == "mull.int" || Name == "vqdmull";
788 
789         if (Name.consume_back(".v2i64")) {
790           // 'arm.mve.*.v2i64.v4i1'
791           bool IsGather = Name.consume_front("vldr.gather.");
792           if (IsGather || Name.consume_front("vstr.scatter.")) {
793             if (Name.consume_front("base.")) {
794               // Optional 'wb.' prefix.
795               Name.consume_front("wb.");
796               // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
797               // predicated.v2i64.v2i64.v4i1'.
798               return Name == "predicated.v2i64";
799             }
800 
801             if (Name.consume_front("offset.predicated."))
802               return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
803                      Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
804 
805             // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
806             return false;
807           }
808 
809           return false; // No other 'arm.mve.*.v2i64.v4i1'.
810         }
811         return false; // No other 'arm.mve.*.v4i1'.
812       }
813       return false; // No other 'arm.mve.*'.
814     }
815 
816     if (Name.consume_front("cde.vcx")) {
817       // 'arm.cde.vcx*'.
818       if (Name.consume_back(".predicated.v2i64.v4i1"))
819         // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
820         return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
821                Name == "3q" || Name == "3qa";
822 
823       return false; // No other 'arm.cde.vcx*'.
824     }
825   } else {
826     // 'aarch64.*'.
827     if (Neon) {
828       // 'aarch64.neon.*'.
829       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
830                              .StartsWith("frintn", Intrinsic::roundeven)
831                              .StartsWith("rbit", Intrinsic::bitreverse)
832                              .Default(Intrinsic::not_intrinsic);
833       if (ID != Intrinsic::not_intrinsic) {
834         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
835                                                   F->arg_begin()->getType());
836         return true;
837       }
838 
839       if (Name.starts_with("addp")) {
840         // 'aarch64.neon.addp*'.
841         if (F->arg_size() != 2)
842           return false; // Invalid IR.
843         VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
844         if (Ty && Ty->getElementType()->isFloatingPointTy()) {
845           NewFn = Intrinsic::getOrInsertDeclaration(
846               F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
847           return true;
848         }
849       }
850 
851       // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
852       if (Name.starts_with("bfcvt")) {
853         NewFn = nullptr;
854         return true;
855       }
856 
857       return false; // No other 'aarch64.neon.*'.
858     }
859     if (Name.consume_front("sve.")) {
860       // 'aarch64.sve.*'.
861       if (Name.consume_front("bf")) {
862         if (Name.consume_back(".lane")) {
863           // 'aarch64.sve.bf*.lane'.
864           Intrinsic::ID ID =
865               StringSwitch<Intrinsic::ID>(Name)
866                   .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
867                   .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
868                   .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
869                   .Default(Intrinsic::not_intrinsic);
870           if (ID != Intrinsic::not_intrinsic) {
871             NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
872             return true;
873           }
874           return false; // No other 'aarch64.sve.bf*.lane'.
875         }
876         return false; // No other 'aarch64.sve.bf*'.
877       }
878 
879       // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
880       if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
881         NewFn = nullptr;
882         return true;
883       }
884 
885       if (Name.consume_front("addqv")) {
886         // 'aarch64.sve.addqv'.
887         if (!F->getReturnType()->isFPOrFPVectorTy())
888           return false;
889 
890         auto Args = F->getFunctionType()->params();
891         Type *Tys[] = {F->getReturnType(), Args[1]};
892         NewFn = Intrinsic::getOrInsertDeclaration(
893             F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
894         return true;
895       }
896 
897       if (Name.consume_front("ld")) {
898         // 'aarch64.sve.ld*'.
899         static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
900         if (LdRegex.match(Name)) {
901           Type *ScalarTy =
902               cast<VectorType>(F->getReturnType())->getElementType();
903           ElementCount EC =
904               cast<VectorType>(F->arg_begin()->getType())->getElementCount();
905           Type *Ty = VectorType::get(ScalarTy, EC);
906           static const Intrinsic::ID LoadIDs[] = {
907               Intrinsic::aarch64_sve_ld2_sret,
908               Intrinsic::aarch64_sve_ld3_sret,
909               Intrinsic::aarch64_sve_ld4_sret,
910           };
911           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
912                                                     LoadIDs[Name[0] - '2'], Ty);
913           return true;
914         }
915         return false; // No other 'aarch64.sve.ld*'.
916       }
917 
918       if (Name.consume_front("tuple.")) {
919         // 'aarch64.sve.tuple.*'.
920         if (Name.starts_with("get")) {
921           // 'aarch64.sve.tuple.get*'.
922           Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
923           NewFn = Intrinsic::getOrInsertDeclaration(
924               F->getParent(), Intrinsic::vector_extract, Tys);
925           return true;
926         }
927 
928         if (Name.starts_with("set")) {
929           // 'aarch64.sve.tuple.set*'.
930           auto Args = F->getFunctionType()->params();
931           Type *Tys[] = {Args[0], Args[2], Args[1]};
932           NewFn = Intrinsic::getOrInsertDeclaration(
933               F->getParent(), Intrinsic::vector_insert, Tys);
934           return true;
935         }
936 
937         static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
938         if (CreateTupleRegex.match(Name)) {
939           // 'aarch64.sve.tuple.create*'.
940           auto Args = F->getFunctionType()->params();
941           Type *Tys[] = {F->getReturnType(), Args[1]};
942           NewFn = Intrinsic::getOrInsertDeclaration(
943               F->getParent(), Intrinsic::vector_insert, Tys);
944           return true;
945         }
946         return false; // No other 'aarch64.sve.tuple.*'.
947       }
948       return false; // No other 'aarch64.sve.*'.
949     }
950   }
951   return false; // No other 'arm.*', 'aarch64.*'.
952 }
953 
shouldUpgradeNVPTXTMAG2SIntrinsics(Function * F,StringRef Name)954 static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
955                                                         StringRef Name) {
956   if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
957     Intrinsic::ID ID =
958         StringSwitch<Intrinsic::ID>(Name)
959             .Case("im2col.3d",
960                   Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
961             .Case("im2col.4d",
962                   Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
963             .Case("im2col.5d",
964                   Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
965             .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
966             .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
967             .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
968             .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
969             .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
970             .Default(Intrinsic::not_intrinsic);
971 
972     if (ID == Intrinsic::not_intrinsic)
973       return ID;
974 
975     // These intrinsics may need upgrade for two reasons:
976     // (1) When the address-space of the first argument is shared[AS=3]
977     //     (and we upgrade it to use shared_cluster address-space[AS=7])
978     if (F->getArg(0)->getType()->getPointerAddressSpace() ==
979         NVPTXAS::ADDRESS_SPACE_SHARED)
980       return ID;
981 
982     // (2) When there are only two boolean flag arguments at the end:
983     //
984     // The last three parameters of the older version of these
985     // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
986     //
987     // The newer version reads as:
988     // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
989     //
990     // So, when the type of the [N-3]rd argument is "not i1", then
991     // it is the older version and we need to upgrade.
992     size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
993     Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
994     if (!ArgType->isIntegerTy(1))
995       return ID;
996   }
997 
998   return Intrinsic::not_intrinsic;
999 }
1000 
shouldUpgradeNVPTXSharedClusterIntrinsic(Function * F,StringRef Name)1001 static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1002                                                               StringRef Name) {
1003   if (Name.consume_front("mapa.shared.cluster"))
1004     if (F->getReturnType()->getPointerAddressSpace() ==
1005         NVPTXAS::ADDRESS_SPACE_SHARED)
1006       return Intrinsic::nvvm_mapa_shared_cluster;
1007 
1008   if (Name.consume_front("cp.async.bulk.")) {
1009     Intrinsic::ID ID =
1010         StringSwitch<Intrinsic::ID>(Name)
1011             .Case("global.to.shared.cluster",
1012                   Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1013             .Case("shared.cta.to.cluster",
1014                   Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1015             .Default(Intrinsic::not_intrinsic);
1016 
1017     if (ID != Intrinsic::not_intrinsic)
1018       if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1019           NVPTXAS::ADDRESS_SPACE_SHARED)
1020         return ID;
1021   }
1022 
1023   return Intrinsic::not_intrinsic;
1024 }
1025 
shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)1026 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1027   if (Name.consume_front("fma.rn."))
1028     return StringSwitch<Intrinsic::ID>(Name)
1029         .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1030         .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1031         .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1032         .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1033         .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1034         .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1035         .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1036         .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1037         .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1038         .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1039         .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1040         .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1041         .Default(Intrinsic::not_intrinsic);
1042 
1043   if (Name.consume_front("fmax."))
1044     return StringSwitch<Intrinsic::ID>(Name)
1045         .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1046         .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1047         .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1048         .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1049         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1050         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1051         .Case("ftz.nan.xorsign.abs.bf16",
1052               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1053         .Case("ftz.nan.xorsign.abs.bf16x2",
1054               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1055         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1056         .Case("ftz.xorsign.abs.bf16x2",
1057               Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1058         .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1059         .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1060         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1061         .Case("nan.xorsign.abs.bf16x2",
1062               Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1063         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1064         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1065         .Default(Intrinsic::not_intrinsic);
1066 
1067   if (Name.consume_front("fmin."))
1068     return StringSwitch<Intrinsic::ID>(Name)
1069         .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1070         .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1071         .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1072         .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1073         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1074         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1075         .Case("ftz.nan.xorsign.abs.bf16",
1076               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1077         .Case("ftz.nan.xorsign.abs.bf16x2",
1078               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1079         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1080         .Case("ftz.xorsign.abs.bf16x2",
1081               Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1082         .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1083         .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1084         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1085         .Case("nan.xorsign.abs.bf16x2",
1086               Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1087         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1088         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1089         .Default(Intrinsic::not_intrinsic);
1090 
1091   if (Name.consume_front("neg."))
1092     return StringSwitch<Intrinsic::ID>(Name)
1093         .Case("bf16", Intrinsic::nvvm_neg_bf16)
1094         .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1095         .Default(Intrinsic::not_intrinsic);
1096 
1097   return Intrinsic::not_intrinsic;
1098 }
1099 
consumeNVVMPtrAddrSpace(StringRef & Name)1100 static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1101   return Name.consume_front("local") || Name.consume_front("shared") ||
1102          Name.consume_front("global") || Name.consume_front("constant") ||
1103          Name.consume_front("param");
1104 }
1105 
upgradeIntrinsicFunction1(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1106 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1107                                       bool CanUpgradeDebugIntrinsicsToRecords) {
1108   assert(F && "Illegal to upgrade a non-existent Function.");
1109 
1110   StringRef Name = F->getName();
1111 
1112   // Quickly eliminate it, if it's not a candidate.
1113   if (!Name.consume_front("llvm.") || Name.empty())
1114     return false;
1115 
1116   switch (Name[0]) {
1117   default: break;
1118   case 'a': {
1119     bool IsArm = Name.consume_front("arm.");
1120     if (IsArm || Name.consume_front("aarch64.")) {
1121       if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1122         return true;
1123       break;
1124     }
1125 
1126     if (Name.consume_front("amdgcn.")) {
1127       if (Name == "alignbit") {
1128         // Target specific intrinsic became redundant
1129         NewFn = Intrinsic::getOrInsertDeclaration(
1130             F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1131         return true;
1132       }
1133 
1134       if (Name.consume_front("atomic.")) {
1135         if (Name.starts_with("inc") || Name.starts_with("dec")) {
1136           // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1137           // there's no new declaration.
1138           NewFn = nullptr;
1139           return true;
1140         }
1141         break; // No other 'amdgcn.atomic.*'
1142       }
1143 
1144       if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1145           Name.consume_front("flat.atomic.")) {
1146         if (Name.starts_with("fadd") ||
1147             // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1148             (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1149             (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1150           // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1151           // declaration.
1152           NewFn = nullptr;
1153           return true;
1154         }
1155       }
1156 
1157       if (Name.starts_with("ldexp.")) {
1158         // Target specific intrinsic became redundant
1159         NewFn = Intrinsic::getOrInsertDeclaration(
1160             F->getParent(), Intrinsic::ldexp,
1161             {F->getReturnType(), F->getArg(1)->getType()});
1162         return true;
1163       }
1164       break; // No other 'amdgcn.*'
1165     }
1166 
1167     break;
1168   }
1169   case 'c': {
1170     if (F->arg_size() == 1) {
1171       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1172                              .StartsWith("ctlz.", Intrinsic::ctlz)
1173                              .StartsWith("cttz.", Intrinsic::cttz)
1174                              .Default(Intrinsic::not_intrinsic);
1175       if (ID != Intrinsic::not_intrinsic) {
1176         rename(F);
1177         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1178                                                   F->arg_begin()->getType());
1179         return true;
1180       }
1181     }
1182 
1183     if (F->arg_size() == 2 && Name == "coro.end") {
1184       rename(F);
1185       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1186                                                 Intrinsic::coro_end);
1187       return true;
1188     }
1189 
1190     break;
1191   }
1192   case 'd':
1193     if (Name.consume_front("dbg.")) {
1194       // Mark debug intrinsics for upgrade to new debug format.
1195       if (CanUpgradeDebugIntrinsicsToRecords) {
1196         if (Name == "addr" || Name == "value" || Name == "assign" ||
1197             Name == "declare" || Name == "label") {
1198           // There's no function to replace these with.
1199           NewFn = nullptr;
1200           // But we do want these to get upgraded.
1201           return true;
1202         }
1203       }
1204       // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1205       // converted to DbgVariableRecords later.
1206       if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1207         rename(F);
1208         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1209                                                   Intrinsic::dbg_value);
1210         return true;
1211       }
1212       break; // No other 'dbg.*'.
1213     }
1214     break;
1215   case 'e':
1216     if (Name.consume_front("experimental.vector.")) {
1217       Intrinsic::ID ID =
1218           StringSwitch<Intrinsic::ID>(Name)
1219               // Skip over extract.last.active, otherwise it will be 'upgraded'
1220               // to a regular vector extract which is a different operation.
1221               .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1222               .StartsWith("extract.", Intrinsic::vector_extract)
1223               .StartsWith("insert.", Intrinsic::vector_insert)
1224               .StartsWith("splice.", Intrinsic::vector_splice)
1225               .StartsWith("reverse.", Intrinsic::vector_reverse)
1226               .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1227               .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1228               .Default(Intrinsic::not_intrinsic);
1229       if (ID != Intrinsic::not_intrinsic) {
1230         const auto *FT = F->getFunctionType();
1231         SmallVector<Type *, 2> Tys;
1232         if (ID == Intrinsic::vector_extract ||
1233             ID == Intrinsic::vector_interleave2)
1234           // Extracting overloads the return type.
1235           Tys.push_back(FT->getReturnType());
1236         if (ID != Intrinsic::vector_interleave2)
1237           Tys.push_back(FT->getParamType(0));
1238         if (ID == Intrinsic::vector_insert)
1239           // Inserting overloads the inserted type.
1240           Tys.push_back(FT->getParamType(1));
1241         rename(F);
1242         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1243         return true;
1244       }
1245 
1246       if (Name.consume_front("reduce.")) {
1247         SmallVector<StringRef, 2> Groups;
1248         static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1249         if (R.match(Name, &Groups))
1250           ID = StringSwitch<Intrinsic::ID>(Groups[1])
1251                    .Case("add", Intrinsic::vector_reduce_add)
1252                    .Case("mul", Intrinsic::vector_reduce_mul)
1253                    .Case("and", Intrinsic::vector_reduce_and)
1254                    .Case("or", Intrinsic::vector_reduce_or)
1255                    .Case("xor", Intrinsic::vector_reduce_xor)
1256                    .Case("smax", Intrinsic::vector_reduce_smax)
1257                    .Case("smin", Intrinsic::vector_reduce_smin)
1258                    .Case("umax", Intrinsic::vector_reduce_umax)
1259                    .Case("umin", Intrinsic::vector_reduce_umin)
1260                    .Case("fmax", Intrinsic::vector_reduce_fmax)
1261                    .Case("fmin", Intrinsic::vector_reduce_fmin)
1262                    .Default(Intrinsic::not_intrinsic);
1263 
1264         bool V2 = false;
1265         if (ID == Intrinsic::not_intrinsic) {
1266           static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1267           Groups.clear();
1268           V2 = true;
1269           if (R2.match(Name, &Groups))
1270             ID = StringSwitch<Intrinsic::ID>(Groups[1])
1271                      .Case("fadd", Intrinsic::vector_reduce_fadd)
1272                      .Case("fmul", Intrinsic::vector_reduce_fmul)
1273                      .Default(Intrinsic::not_intrinsic);
1274         }
1275         if (ID != Intrinsic::not_intrinsic) {
1276           rename(F);
1277           auto Args = F->getFunctionType()->params();
1278           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1279                                                     {Args[V2 ? 1 : 0]});
1280           return true;
1281         }
1282         break; // No other 'expermental.vector.reduce.*'.
1283       }
1284       break; // No other 'experimental.vector.*'.
1285     }
1286     if (Name.consume_front("experimental.stepvector.")) {
1287       Intrinsic::ID ID = Intrinsic::stepvector;
1288       rename(F);
1289       NewFn = Intrinsic::getOrInsertDeclaration(
1290           F->getParent(), ID, F->getFunctionType()->getReturnType());
1291       return true;
1292     }
1293     break; // No other 'e*'.
1294   case 'f':
1295     if (Name.starts_with("flt.rounds")) {
1296       rename(F);
1297       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1298                                                 Intrinsic::get_rounding);
1299       return true;
1300     }
1301     break;
1302   case 'i':
1303     if (Name.starts_with("invariant.group.barrier")) {
1304       // Rename invariant.group.barrier to launder.invariant.group
1305       auto Args = F->getFunctionType()->params();
1306       Type* ObjectPtr[1] = {Args[0]};
1307       rename(F);
1308       NewFn = Intrinsic::getOrInsertDeclaration(
1309           F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1310       return true;
1311     }
1312     break;
1313   case 'm': {
1314     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1315     // alignment parameter to embedding the alignment as an attribute of
1316     // the pointer args.
1317     if (unsigned ID = StringSwitch<unsigned>(Name)
1318                           .StartsWith("memcpy.", Intrinsic::memcpy)
1319                           .StartsWith("memmove.", Intrinsic::memmove)
1320                           .Default(0)) {
1321       if (F->arg_size() == 5) {
1322         rename(F);
1323         // Get the types of dest, src, and len
1324         ArrayRef<Type *> ParamTypes =
1325             F->getFunctionType()->params().slice(0, 3);
1326         NewFn =
1327             Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1328         return true;
1329       }
1330     }
1331     if (Name.starts_with("memset.") && F->arg_size() == 5) {
1332       rename(F);
1333       // Get the types of dest, and len
1334       const auto *FT = F->getFunctionType();
1335       Type *ParamTypes[2] = {
1336           FT->getParamType(0), // Dest
1337           FT->getParamType(2)  // len
1338       };
1339       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1340                                                 Intrinsic::memset, ParamTypes);
1341       return true;
1342     }
1343     break;
1344   }
1345   case 'n': {
1346     if (Name.consume_front("nvvm.")) {
1347       // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1348       if (F->arg_size() == 1) {
1349         Intrinsic::ID IID =
1350             StringSwitch<Intrinsic::ID>(Name)
1351                 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1352                 .Case("clz.i", Intrinsic::ctlz)
1353                 .Case("popc.i", Intrinsic::ctpop)
1354                 .Default(Intrinsic::not_intrinsic);
1355         if (IID != Intrinsic::not_intrinsic) {
1356           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1357                                                     {F->getReturnType()});
1358           return true;
1359         }
1360       }
1361 
1362       // Check for nvvm intrinsics that need a return type adjustment.
1363       if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1364         Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1365         if (IID != Intrinsic::not_intrinsic) {
1366           NewFn = nullptr;
1367           return true;
1368         }
1369       }
1370 
1371       // Upgrade Distributed Shared Memory Intrinsics
1372       Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1373       if (IID != Intrinsic::not_intrinsic) {
1374         rename(F);
1375         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1376         return true;
1377       }
1378 
1379       // Upgrade TMA copy G2S Intrinsics
1380       IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1381       if (IID != Intrinsic::not_intrinsic) {
1382         rename(F);
1383         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1384         return true;
1385       }
1386 
1387       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1388       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1389       //
1390       // TODO: We could add lohi.i2d.
1391       bool Expand = false;
1392       if (Name.consume_front("abs."))
1393         // nvvm.abs.{i,ii}
1394         Expand =
1395             Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1396       else if (Name.consume_front("fabs."))
1397         // nvvm.fabs.{f,ftz.f,d}
1398         Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1399       else if (Name.consume_front("max.") || Name.consume_front("min."))
1400         // nvvm.{min,max}.{i,ii,ui,ull}
1401         Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1402                  Name == "ui" || Name == "ull";
1403       else if (Name.consume_front("atomic.load."))
1404         // nvvm.atomic.load.add.{f32,f64}.p
1405         // nvvm.atomic.load.{inc,dec}.32.p
1406         Expand = StringSwitch<bool>(Name)
1407                      .StartsWith("add.f32.p", true)
1408                      .StartsWith("add.f64.p", true)
1409                      .StartsWith("inc.32.p", true)
1410                      .StartsWith("dec.32.p", true)
1411                      .Default(false);
1412       else if (Name.consume_front("bitcast."))
1413         // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1414         Expand =
1415             Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1416       else if (Name.consume_front("rotate."))
1417         // nvvm.rotate.{b32,b64,right.b64}
1418         Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1419       else if (Name.consume_front("ptr.gen.to."))
1420         // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1421         Expand = consumeNVVMPtrAddrSpace(Name);
1422       else if (Name.consume_front("ptr."))
1423         // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1424         Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1425       else if (Name.consume_front("ldg.global."))
1426         // nvvm.ldg.global.{i,p,f}
1427         Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1428                   Name.starts_with("p."));
1429       else
1430         Expand = StringSwitch<bool>(Name)
1431                      .Case("barrier0", true)
1432                      .Case("barrier.n", true)
1433                      .Case("barrier.sync.cnt", true)
1434                      .Case("barrier.sync", true)
1435                      .Case("barrier", true)
1436                      .Case("bar.sync", true)
1437                      .Case("clz.ll", true)
1438                      .Case("popc.ll", true)
1439                      .Case("h2f", true)
1440                      .Case("swap.lo.hi.b64", true)
1441                      .Default(false);
1442 
1443       if (Expand) {
1444         NewFn = nullptr;
1445         return true;
1446       }
1447       break; // No other 'nvvm.*'.
1448     }
1449     break;
1450   }
1451   case 'o':
1452     if (Name.starts_with("objectsize.")) {
1453       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1454       if (F->arg_size() == 2 || F->arg_size() == 3) {
1455         rename(F);
1456         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1457                                                   Intrinsic::objectsize, Tys);
1458         return true;
1459       }
1460     }
1461     break;
1462 
1463   case 'p':
1464     if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1465       rename(F);
1466       NewFn = Intrinsic::getOrInsertDeclaration(
1467           F->getParent(), Intrinsic::ptr_annotation,
1468           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1469       return true;
1470     }
1471     break;
1472 
1473   case 'r': {
1474     if (Name.consume_front("riscv.")) {
1475       Intrinsic::ID ID;
1476       ID = StringSwitch<Intrinsic::ID>(Name)
1477                .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1478                .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1479                .Case("aes32esi", Intrinsic::riscv_aes32esi)
1480                .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1481                .Default(Intrinsic::not_intrinsic);
1482       if (ID != Intrinsic::not_intrinsic) {
1483         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1484           rename(F);
1485           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1486           return true;
1487         }
1488         break; // No other applicable upgrades.
1489       }
1490 
1491       ID = StringSwitch<Intrinsic::ID>(Name)
1492                .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1493                .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1494                .Default(Intrinsic::not_intrinsic);
1495       if (ID != Intrinsic::not_intrinsic) {
1496         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1497             F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1498           rename(F);
1499           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1500           return true;
1501         }
1502         break; // No other applicable upgrades.
1503       }
1504 
1505       ID = StringSwitch<Intrinsic::ID>(Name)
1506                .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1507                .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1508                .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1509                .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1510                .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1511                .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1512                .Default(Intrinsic::not_intrinsic);
1513       if (ID != Intrinsic::not_intrinsic) {
1514         if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1515           rename(F);
1516           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1517           return true;
1518         }
1519         break; // No other applicable upgrades.
1520       }
1521       break; // No other 'riscv.*' intrinsics
1522     }
1523   } break;
1524 
1525   case 's':
1526     if (Name == "stackprotectorcheck") {
1527       NewFn = nullptr;
1528       return true;
1529     }
1530     break;
1531 
1532   case 't':
1533     if (Name == "thread.pointer") {
1534       NewFn = Intrinsic::getOrInsertDeclaration(
1535           F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1536       return true;
1537     }
1538     break;
1539 
1540   case 'v': {
1541     if (Name == "var.annotation" && F->arg_size() == 4) {
1542       rename(F);
1543       NewFn = Intrinsic::getOrInsertDeclaration(
1544           F->getParent(), Intrinsic::var_annotation,
1545           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1546       return true;
1547     }
1548     break;
1549   }
1550 
1551   case 'w':
1552     if (Name.consume_front("wasm.")) {
1553       Intrinsic::ID ID =
1554           StringSwitch<Intrinsic::ID>(Name)
1555               .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1556               .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1557               .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1558               .Default(Intrinsic::not_intrinsic);
1559       if (ID != Intrinsic::not_intrinsic) {
1560         rename(F);
1561         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1562                                                   F->getReturnType());
1563         return true;
1564       }
1565 
1566       if (Name.consume_front("dot.i8x16.i7x16.")) {
1567         ID = StringSwitch<Intrinsic::ID>(Name)
1568                  .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1569                  .Case("add.signed",
1570                        Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1571                  .Default(Intrinsic::not_intrinsic);
1572         if (ID != Intrinsic::not_intrinsic) {
1573           rename(F);
1574           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1575           return true;
1576         }
1577         break; // No other 'wasm.dot.i8x16.i7x16.*'.
1578       }
1579       break; // No other 'wasm.*'.
1580     }
1581     break;
1582 
1583   case 'x':
1584     if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1585       return true;
1586   }
1587 
1588   auto *ST = dyn_cast<StructType>(F->getReturnType());
1589   if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1590       F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1591     // Replace return type with literal non-packed struct. Only do this for
1592     // intrinsics declared to return a struct, not for intrinsics with
1593     // overloaded return type, in which case the exact struct type will be
1594     // mangled into the name.
1595     SmallVector<Intrinsic::IITDescriptor> Desc;
1596     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1597     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1598       auto *FT = F->getFunctionType();
1599       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1600       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1601       std::string Name = F->getName().str();
1602       rename(F);
1603       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1604                                Name, F->getParent());
1605 
1606       // The new function may also need remangling.
1607       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1608         NewFn = *Result;
1609       return true;
1610     }
1611   }
1612 
1613   // Remangle our intrinsic since we upgrade the mangling
1614   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1615   if (Result != std::nullopt) {
1616     NewFn = *Result;
1617     return true;
1618   }
1619 
1620   //  This may not belong here. This function is effectively being overloaded
1621   //  to both detect an intrinsic which needs upgrading, and to provide the
1622   //  upgraded form of the intrinsic. We should perhaps have two separate
1623   //  functions for this.
1624   return false;
1625 }
1626 
UpgradeIntrinsicFunction(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1627 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1628                                     bool CanUpgradeDebugIntrinsicsToRecords) {
1629   NewFn = nullptr;
1630   bool Upgraded =
1631       upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1632   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1633 
1634   // Upgrade intrinsic attributes.  This does not change the function.
1635   if (NewFn)
1636     F = NewFn;
1637   if (Intrinsic::ID id = F->getIntrinsicID()) {
1638     // Only do this if the intrinsic signature is valid.
1639     SmallVector<Type *> OverloadTys;
1640     if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1641       F->setAttributes(
1642           Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1643   }
1644   return Upgraded;
1645 }
1646 
UpgradeGlobalVariable(GlobalVariable * GV)1647 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1648   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1649                           GV->getName() == "llvm.global_dtors")) ||
1650       !GV->hasInitializer())
1651     return nullptr;
1652   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1653   if (!ATy)
1654     return nullptr;
1655   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1656   if (!STy || STy->getNumElements() != 2)
1657     return nullptr;
1658 
1659   LLVMContext &C = GV->getContext();
1660   IRBuilder<> IRB(C);
1661   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1662                                IRB.getPtrTy());
1663   Constant *Init = GV->getInitializer();
1664   unsigned N = Init->getNumOperands();
1665   std::vector<Constant *> NewCtors(N);
1666   for (unsigned i = 0; i != N; ++i) {
1667     auto Ctor = cast<Constant>(Init->getOperand(i));
1668     NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1669                                       Ctor->getAggregateElement(1),
1670                                       ConstantPointerNull::get(IRB.getPtrTy()));
1671   }
1672   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1673 
1674   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1675                             NewInit, GV->getName());
1676 }
1677 
1678 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1679 // to byte shuffles.
upgradeX86PSLLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1680 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1681                                          unsigned Shift) {
1682   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1683   unsigned NumElts = ResultTy->getNumElements() * 8;
1684 
1685   // Bitcast from a 64-bit element type to a byte element type.
1686   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1687   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1688 
1689   // We'll be shuffling in zeroes.
1690   Value *Res = Constant::getNullValue(VecTy);
1691 
1692   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1693   // we'll just return the zero vector.
1694   if (Shift < 16) {
1695     int Idxs[64];
1696     // 256/512-bit version is split into 2/4 16-byte lanes.
1697     for (unsigned l = 0; l != NumElts; l += 16)
1698       for (unsigned i = 0; i != 16; ++i) {
1699         unsigned Idx = NumElts + i - Shift;
1700         if (Idx < NumElts)
1701           Idx -= NumElts - 16; // end of lane, switch operand.
1702         Idxs[l + i] = Idx + l;
1703       }
1704 
1705     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1706   }
1707 
1708   // Bitcast back to a 64-bit element type.
1709   return Builder.CreateBitCast(Res, ResultTy, "cast");
1710 }
1711 
1712 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1713 // to byte shuffles.
upgradeX86PSRLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1714 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1715                                          unsigned Shift) {
1716   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1717   unsigned NumElts = ResultTy->getNumElements() * 8;
1718 
1719   // Bitcast from a 64-bit element type to a byte element type.
1720   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1721   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1722 
1723   // We'll be shuffling in zeroes.
1724   Value *Res = Constant::getNullValue(VecTy);
1725 
1726   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1727   // we'll just return the zero vector.
1728   if (Shift < 16) {
1729     int Idxs[64];
1730     // 256/512-bit version is split into 2/4 16-byte lanes.
1731     for (unsigned l = 0; l != NumElts; l += 16)
1732       for (unsigned i = 0; i != 16; ++i) {
1733         unsigned Idx = i + Shift;
1734         if (Idx >= 16)
1735           Idx += NumElts - 16; // end of lane, switch operand.
1736         Idxs[l + i] = Idx + l;
1737       }
1738 
1739     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1740   }
1741 
1742   // Bitcast back to a 64-bit element type.
1743   return Builder.CreateBitCast(Res, ResultTy, "cast");
1744 }
1745 
getX86MaskVec(IRBuilder<> & Builder,Value * Mask,unsigned NumElts)1746 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1747                             unsigned NumElts) {
1748   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1749   llvm::VectorType *MaskTy = FixedVectorType::get(
1750       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1751   Mask = Builder.CreateBitCast(Mask, MaskTy);
1752 
1753   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1754   // i8 and we need to extract down to the right number of elements.
1755   if (NumElts <= 4) {
1756     int Indices[4];
1757     for (unsigned i = 0; i != NumElts; ++i)
1758       Indices[i] = i;
1759     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1760                                        "extract");
1761   }
1762 
1763   return Mask;
1764 }
1765 
emitX86Select(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1766 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1767                             Value *Op1) {
1768   // If the mask is all ones just emit the first operation.
1769   if (const auto *C = dyn_cast<Constant>(Mask))
1770     if (C->isAllOnesValue())
1771       return Op0;
1772 
1773   Mask = getX86MaskVec(Builder, Mask,
1774                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1775   return Builder.CreateSelect(Mask, Op0, Op1);
1776 }
1777 
emitX86ScalarSelect(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1778 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1779                                   Value *Op1) {
1780   // If the mask is all ones just emit the first operation.
1781   if (const auto *C = dyn_cast<Constant>(Mask))
1782     if (C->isAllOnesValue())
1783       return Op0;
1784 
1785   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1786                                       Mask->getType()->getIntegerBitWidth());
1787   Mask = Builder.CreateBitCast(Mask, MaskTy);
1788   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1789   return Builder.CreateSelect(Mask, Op0, Op1);
1790 }
1791 
1792 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1793 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1794 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
upgradeX86ALIGNIntrinsics(IRBuilder<> & Builder,Value * Op0,Value * Op1,Value * Shift,Value * Passthru,Value * Mask,bool IsVALIGN)1795 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1796                                         Value *Op1, Value *Shift,
1797                                         Value *Passthru, Value *Mask,
1798                                         bool IsVALIGN) {
1799   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1800 
1801   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1802   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1803   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1804   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1805 
1806   // Mask the immediate for VALIGN.
1807   if (IsVALIGN)
1808     ShiftVal &= (NumElts - 1);
1809 
1810   // If palignr is shifting the pair of vectors more than the size of two
1811   // lanes, emit zero.
1812   if (ShiftVal >= 32)
1813     return llvm::Constant::getNullValue(Op0->getType());
1814 
1815   // If palignr is shifting the pair of input vectors more than one lane,
1816   // but less than two lanes, convert to shifting in zeroes.
1817   if (ShiftVal > 16) {
1818     ShiftVal -= 16;
1819     Op1 = Op0;
1820     Op0 = llvm::Constant::getNullValue(Op0->getType());
1821   }
1822 
1823   int Indices[64];
1824   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1825   for (unsigned l = 0; l < NumElts; l += 16) {
1826     for (unsigned i = 0; i != 16; ++i) {
1827       unsigned Idx = ShiftVal + i;
1828       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1829         Idx += NumElts - 16; // End of lane, switch operand.
1830       Indices[l + i] = Idx + l;
1831     }
1832   }
1833 
1834   Value *Align = Builder.CreateShuffleVector(
1835       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1836 
1837   return emitX86Select(Builder, Mask, Align, Passthru);
1838 }
1839 
upgradeX86VPERMT2Intrinsics(IRBuilder<> & Builder,CallBase & CI,bool ZeroMask,bool IndexForm)1840 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1841                                           bool ZeroMask, bool IndexForm) {
1842   Type *Ty = CI.getType();
1843   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1844   unsigned EltWidth = Ty->getScalarSizeInBits();
1845   bool IsFloat = Ty->isFPOrFPVectorTy();
1846   Intrinsic::ID IID;
1847   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1848     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1849   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1850     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1851   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1852     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1853   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1854     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1855   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1856     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1857   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1858     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1859   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1860     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1861   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1862     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1863   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1864     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1865   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1866     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1867   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1868     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1869   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1870     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1871   else if (VecWidth == 128 && EltWidth == 16)
1872     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1873   else if (VecWidth == 256 && EltWidth == 16)
1874     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1875   else if (VecWidth == 512 && EltWidth == 16)
1876     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1877   else if (VecWidth == 128 && EltWidth == 8)
1878     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1879   else if (VecWidth == 256 && EltWidth == 8)
1880     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1881   else if (VecWidth == 512 && EltWidth == 8)
1882     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1883   else
1884     llvm_unreachable("Unexpected intrinsic");
1885 
1886   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1887                     CI.getArgOperand(2) };
1888 
1889   // If this isn't index form we need to swap operand 0 and 1.
1890   if (!IndexForm)
1891     std::swap(Args[0], Args[1]);
1892 
1893   Value *V = Builder.CreateIntrinsic(IID, Args);
1894   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1895                              : Builder.CreateBitCast(CI.getArgOperand(1),
1896                                                      Ty);
1897   return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1898 }
1899 
upgradeX86BinaryIntrinsics(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1900 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1901                                          Intrinsic::ID IID) {
1902   Type *Ty = CI.getType();
1903   Value *Op0 = CI.getOperand(0);
1904   Value *Op1 = CI.getOperand(1);
1905   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1906 
1907   if (CI.arg_size() == 4) { // For masked intrinsics.
1908     Value *VecSrc = CI.getOperand(2);
1909     Value *Mask = CI.getOperand(3);
1910     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1911   }
1912   return Res;
1913 }
1914 
upgradeX86Rotate(IRBuilder<> & Builder,CallBase & CI,bool IsRotateRight)1915 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1916                                bool IsRotateRight) {
1917   Type *Ty = CI.getType();
1918   Value *Src = CI.getArgOperand(0);
1919   Value *Amt = CI.getArgOperand(1);
1920 
1921   // Amount may be scalar immediate, in which case create a splat vector.
1922   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1923   // we only care about the lowest log2 bits anyway.
1924   if (Amt->getType() != Ty) {
1925     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1926     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1927     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1928   }
1929 
1930   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1931   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1932 
1933   if (CI.arg_size() == 4) { // For masked intrinsics.
1934     Value *VecSrc = CI.getOperand(2);
1935     Value *Mask = CI.getOperand(3);
1936     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1937   }
1938   return Res;
1939 }
1940 
upgradeX86vpcom(IRBuilder<> & Builder,CallBase & CI,unsigned Imm,bool IsSigned)1941 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1942                               bool IsSigned) {
1943   Type *Ty = CI.getType();
1944   Value *LHS = CI.getArgOperand(0);
1945   Value *RHS = CI.getArgOperand(1);
1946 
1947   CmpInst::Predicate Pred;
1948   switch (Imm) {
1949   case 0x0:
1950     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1951     break;
1952   case 0x1:
1953     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1954     break;
1955   case 0x2:
1956     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1957     break;
1958   case 0x3:
1959     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1960     break;
1961   case 0x4:
1962     Pred = ICmpInst::ICMP_EQ;
1963     break;
1964   case 0x5:
1965     Pred = ICmpInst::ICMP_NE;
1966     break;
1967   case 0x6:
1968     return Constant::getNullValue(Ty); // FALSE
1969   case 0x7:
1970     return Constant::getAllOnesValue(Ty); // TRUE
1971   default:
1972     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1973   }
1974 
1975   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1976   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1977   return Ext;
1978 }
1979 
upgradeX86ConcatShift(IRBuilder<> & Builder,CallBase & CI,bool IsShiftRight,bool ZeroMask)1980 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1981                                     bool IsShiftRight, bool ZeroMask) {
1982   Type *Ty = CI.getType();
1983   Value *Op0 = CI.getArgOperand(0);
1984   Value *Op1 = CI.getArgOperand(1);
1985   Value *Amt = CI.getArgOperand(2);
1986 
1987   if (IsShiftRight)
1988     std::swap(Op0, Op1);
1989 
1990   // Amount may be scalar immediate, in which case create a splat vector.
1991   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1992   // we only care about the lowest log2 bits anyway.
1993   if (Amt->getType() != Ty) {
1994     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1995     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1996     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1997   }
1998 
1999   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2000   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2001 
2002   unsigned NumArgs = CI.arg_size();
2003   if (NumArgs >= 4) { // For masked intrinsics.
2004     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2005                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
2006                                    CI.getArgOperand(0);
2007     Value *Mask = CI.getOperand(NumArgs - 1);
2008     Res = emitX86Select(Builder, Mask, Res, VecSrc);
2009   }
2010   return Res;
2011 }
2012 
upgradeMaskedStore(IRBuilder<> & Builder,Value * Ptr,Value * Data,Value * Mask,bool Aligned)2013 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2014                                  Value *Mask, bool Aligned) {
2015   const Align Alignment =
2016       Aligned
2017           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2018           : Align(1);
2019 
2020   // If the mask is all ones just emit a regular store.
2021   if (const auto *C = dyn_cast<Constant>(Mask))
2022     if (C->isAllOnesValue())
2023       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2024 
2025   // Convert the mask from an integer type to a vector of i1.
2026   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2027   Mask = getX86MaskVec(Builder, Mask, NumElts);
2028   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2029 }
2030 
upgradeMaskedLoad(IRBuilder<> & Builder,Value * Ptr,Value * Passthru,Value * Mask,bool Aligned)2031 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2032                                 Value *Passthru, Value *Mask, bool Aligned) {
2033   Type *ValTy = Passthru->getType();
2034   const Align Alignment =
2035       Aligned
2036           ? Align(
2037                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2038                 8)
2039           : Align(1);
2040 
2041   // If the mask is all ones just emit a regular store.
2042   if (const auto *C = dyn_cast<Constant>(Mask))
2043     if (C->isAllOnesValue())
2044       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2045 
2046   // Convert the mask from an integer type to a vector of i1.
2047   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2048   Mask = getX86MaskVec(Builder, Mask, NumElts);
2049   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2050 }
2051 
upgradeAbs(IRBuilder<> & Builder,CallBase & CI)2052 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2053   Type *Ty = CI.getType();
2054   Value *Op0 = CI.getArgOperand(0);
2055   Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2056                                        {Op0, Builder.getInt1(false)});
2057   if (CI.arg_size() == 3)
2058     Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2059   return Res;
2060 }
2061 
upgradePMULDQ(IRBuilder<> & Builder,CallBase & CI,bool IsSigned)2062 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2063   Type *Ty = CI.getType();
2064 
2065   // Arguments have a vXi32 type so cast to vXi64.
2066   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2067   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2068 
2069   if (IsSigned) {
2070     // Shift left then arithmetic shift right.
2071     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2072     LHS = Builder.CreateShl(LHS, ShiftAmt);
2073     LHS = Builder.CreateAShr(LHS, ShiftAmt);
2074     RHS = Builder.CreateShl(RHS, ShiftAmt);
2075     RHS = Builder.CreateAShr(RHS, ShiftAmt);
2076   } else {
2077     // Clear the upper bits.
2078     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2079     LHS = Builder.CreateAnd(LHS, Mask);
2080     RHS = Builder.CreateAnd(RHS, Mask);
2081   }
2082 
2083   Value *Res = Builder.CreateMul(LHS, RHS);
2084 
2085   if (CI.arg_size() == 4)
2086     Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2087 
2088   return Res;
2089 }
2090 
2091 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
applyX86MaskOn1BitsVec(IRBuilder<> & Builder,Value * Vec,Value * Mask)2092 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2093                                      Value *Mask) {
2094   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2095   if (Mask) {
2096     const auto *C = dyn_cast<Constant>(Mask);
2097     if (!C || !C->isAllOnesValue())
2098       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2099   }
2100 
2101   if (NumElts < 8) {
2102     int Indices[8];
2103     for (unsigned i = 0; i != NumElts; ++i)
2104       Indices[i] = i;
2105     for (unsigned i = NumElts; i != 8; ++i)
2106       Indices[i] = NumElts + i % NumElts;
2107     Vec = Builder.CreateShuffleVector(Vec,
2108                                       Constant::getNullValue(Vec->getType()),
2109                                       Indices);
2110   }
2111   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2112 }
2113 
upgradeMaskedCompare(IRBuilder<> & Builder,CallBase & CI,unsigned CC,bool Signed)2114 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2115                                    unsigned CC, bool Signed) {
2116   Value *Op0 = CI.getArgOperand(0);
2117   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2118 
2119   Value *Cmp;
2120   if (CC == 3) {
2121     Cmp = Constant::getNullValue(
2122         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2123   } else if (CC == 7) {
2124     Cmp = Constant::getAllOnesValue(
2125         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2126   } else {
2127     ICmpInst::Predicate Pred;
2128     switch (CC) {
2129     default: llvm_unreachable("Unknown condition code");
2130     case 0: Pred = ICmpInst::ICMP_EQ;  break;
2131     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2132     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2133     case 4: Pred = ICmpInst::ICMP_NE;  break;
2134     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2135     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2136     }
2137     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2138   }
2139 
2140   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2141 
2142   return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2143 }
2144 
2145 // Replace a masked intrinsic with an older unmasked intrinsic.
upgradeX86MaskedShift(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)2146 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2147                                     Intrinsic::ID IID) {
2148   Value *Rep =
2149       Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2150   return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2151 }
2152 
upgradeMaskedMove(IRBuilder<> & Builder,CallBase & CI)2153 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2154   Value* A = CI.getArgOperand(0);
2155   Value* B = CI.getArgOperand(1);
2156   Value* Src = CI.getArgOperand(2);
2157   Value* Mask = CI.getArgOperand(3);
2158 
2159   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2160   Value* Cmp = Builder.CreateIsNotNull(AndNode);
2161   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2162   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2163   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2164   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2165 }
2166 
upgradeMaskToInt(IRBuilder<> & Builder,CallBase & CI)2167 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2168   Value* Op = CI.getArgOperand(0);
2169   Type* ReturnOp = CI.getType();
2170   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2171   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2172   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2173 }
2174 
2175 // Replace intrinsic with unmasked version and a select.
upgradeAVX512MaskToSelect(StringRef Name,IRBuilder<> & Builder,CallBase & CI,Value * & Rep)2176 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2177                                       CallBase &CI, Value *&Rep) {
2178   Name = Name.substr(12); // Remove avx512.mask.
2179 
2180   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2181   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2182   Intrinsic::ID IID;
2183   if (Name.starts_with("max.p")) {
2184     if (VecWidth == 128 && EltWidth == 32)
2185       IID = Intrinsic::x86_sse_max_ps;
2186     else if (VecWidth == 128 && EltWidth == 64)
2187       IID = Intrinsic::x86_sse2_max_pd;
2188     else if (VecWidth == 256 && EltWidth == 32)
2189       IID = Intrinsic::x86_avx_max_ps_256;
2190     else if (VecWidth == 256 && EltWidth == 64)
2191       IID = Intrinsic::x86_avx_max_pd_256;
2192     else
2193       llvm_unreachable("Unexpected intrinsic");
2194   } else if (Name.starts_with("min.p")) {
2195     if (VecWidth == 128 && EltWidth == 32)
2196       IID = Intrinsic::x86_sse_min_ps;
2197     else if (VecWidth == 128 && EltWidth == 64)
2198       IID = Intrinsic::x86_sse2_min_pd;
2199     else if (VecWidth == 256 && EltWidth == 32)
2200       IID = Intrinsic::x86_avx_min_ps_256;
2201     else if (VecWidth == 256 && EltWidth == 64)
2202       IID = Intrinsic::x86_avx_min_pd_256;
2203     else
2204       llvm_unreachable("Unexpected intrinsic");
2205   } else if (Name.starts_with("pshuf.b.")) {
2206     if (VecWidth == 128)
2207       IID = Intrinsic::x86_ssse3_pshuf_b_128;
2208     else if (VecWidth == 256)
2209       IID = Intrinsic::x86_avx2_pshuf_b;
2210     else if (VecWidth == 512)
2211       IID = Intrinsic::x86_avx512_pshuf_b_512;
2212     else
2213       llvm_unreachable("Unexpected intrinsic");
2214   } else if (Name.starts_with("pmul.hr.sw.")) {
2215     if (VecWidth == 128)
2216       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2217     else if (VecWidth == 256)
2218       IID = Intrinsic::x86_avx2_pmul_hr_sw;
2219     else if (VecWidth == 512)
2220       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2221     else
2222       llvm_unreachable("Unexpected intrinsic");
2223   } else if (Name.starts_with("pmulh.w.")) {
2224     if (VecWidth == 128)
2225       IID = Intrinsic::x86_sse2_pmulh_w;
2226     else if (VecWidth == 256)
2227       IID = Intrinsic::x86_avx2_pmulh_w;
2228     else if (VecWidth == 512)
2229       IID = Intrinsic::x86_avx512_pmulh_w_512;
2230     else
2231       llvm_unreachable("Unexpected intrinsic");
2232   } else if (Name.starts_with("pmulhu.w.")) {
2233     if (VecWidth == 128)
2234       IID = Intrinsic::x86_sse2_pmulhu_w;
2235     else if (VecWidth == 256)
2236       IID = Intrinsic::x86_avx2_pmulhu_w;
2237     else if (VecWidth == 512)
2238       IID = Intrinsic::x86_avx512_pmulhu_w_512;
2239     else
2240       llvm_unreachable("Unexpected intrinsic");
2241   } else if (Name.starts_with("pmaddw.d.")) {
2242     if (VecWidth == 128)
2243       IID = Intrinsic::x86_sse2_pmadd_wd;
2244     else if (VecWidth == 256)
2245       IID = Intrinsic::x86_avx2_pmadd_wd;
2246     else if (VecWidth == 512)
2247       IID = Intrinsic::x86_avx512_pmaddw_d_512;
2248     else
2249       llvm_unreachable("Unexpected intrinsic");
2250   } else if (Name.starts_with("pmaddubs.w.")) {
2251     if (VecWidth == 128)
2252       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2253     else if (VecWidth == 256)
2254       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2255     else if (VecWidth == 512)
2256       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2257     else
2258       llvm_unreachable("Unexpected intrinsic");
2259   } else if (Name.starts_with("packsswb.")) {
2260     if (VecWidth == 128)
2261       IID = Intrinsic::x86_sse2_packsswb_128;
2262     else if (VecWidth == 256)
2263       IID = Intrinsic::x86_avx2_packsswb;
2264     else if (VecWidth == 512)
2265       IID = Intrinsic::x86_avx512_packsswb_512;
2266     else
2267       llvm_unreachable("Unexpected intrinsic");
2268   } else if (Name.starts_with("packssdw.")) {
2269     if (VecWidth == 128)
2270       IID = Intrinsic::x86_sse2_packssdw_128;
2271     else if (VecWidth == 256)
2272       IID = Intrinsic::x86_avx2_packssdw;
2273     else if (VecWidth == 512)
2274       IID = Intrinsic::x86_avx512_packssdw_512;
2275     else
2276       llvm_unreachable("Unexpected intrinsic");
2277   } else if (Name.starts_with("packuswb.")) {
2278     if (VecWidth == 128)
2279       IID = Intrinsic::x86_sse2_packuswb_128;
2280     else if (VecWidth == 256)
2281       IID = Intrinsic::x86_avx2_packuswb;
2282     else if (VecWidth == 512)
2283       IID = Intrinsic::x86_avx512_packuswb_512;
2284     else
2285       llvm_unreachable("Unexpected intrinsic");
2286   } else if (Name.starts_with("packusdw.")) {
2287     if (VecWidth == 128)
2288       IID = Intrinsic::x86_sse41_packusdw;
2289     else if (VecWidth == 256)
2290       IID = Intrinsic::x86_avx2_packusdw;
2291     else if (VecWidth == 512)
2292       IID = Intrinsic::x86_avx512_packusdw_512;
2293     else
2294       llvm_unreachable("Unexpected intrinsic");
2295   } else if (Name.starts_with("vpermilvar.")) {
2296     if (VecWidth == 128 && EltWidth == 32)
2297       IID = Intrinsic::x86_avx_vpermilvar_ps;
2298     else if (VecWidth == 128 && EltWidth == 64)
2299       IID = Intrinsic::x86_avx_vpermilvar_pd;
2300     else if (VecWidth == 256 && EltWidth == 32)
2301       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2302     else if (VecWidth == 256 && EltWidth == 64)
2303       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2304     else if (VecWidth == 512 && EltWidth == 32)
2305       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2306     else if (VecWidth == 512 && EltWidth == 64)
2307       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2308     else
2309       llvm_unreachable("Unexpected intrinsic");
2310   } else if (Name == "cvtpd2dq.256") {
2311     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2312   } else if (Name == "cvtpd2ps.256") {
2313     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2314   } else if (Name == "cvttpd2dq.256") {
2315     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2316   } else if (Name == "cvttps2dq.128") {
2317     IID = Intrinsic::x86_sse2_cvttps2dq;
2318   } else if (Name == "cvttps2dq.256") {
2319     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2320   } else if (Name.starts_with("permvar.")) {
2321     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2322     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2323       IID = Intrinsic::x86_avx2_permps;
2324     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2325       IID = Intrinsic::x86_avx2_permd;
2326     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2327       IID = Intrinsic::x86_avx512_permvar_df_256;
2328     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2329       IID = Intrinsic::x86_avx512_permvar_di_256;
2330     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2331       IID = Intrinsic::x86_avx512_permvar_sf_512;
2332     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2333       IID = Intrinsic::x86_avx512_permvar_si_512;
2334     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2335       IID = Intrinsic::x86_avx512_permvar_df_512;
2336     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2337       IID = Intrinsic::x86_avx512_permvar_di_512;
2338     else if (VecWidth == 128 && EltWidth == 16)
2339       IID = Intrinsic::x86_avx512_permvar_hi_128;
2340     else if (VecWidth == 256 && EltWidth == 16)
2341       IID = Intrinsic::x86_avx512_permvar_hi_256;
2342     else if (VecWidth == 512 && EltWidth == 16)
2343       IID = Intrinsic::x86_avx512_permvar_hi_512;
2344     else if (VecWidth == 128 && EltWidth == 8)
2345       IID = Intrinsic::x86_avx512_permvar_qi_128;
2346     else if (VecWidth == 256 && EltWidth == 8)
2347       IID = Intrinsic::x86_avx512_permvar_qi_256;
2348     else if (VecWidth == 512 && EltWidth == 8)
2349       IID = Intrinsic::x86_avx512_permvar_qi_512;
2350     else
2351       llvm_unreachable("Unexpected intrinsic");
2352   } else if (Name.starts_with("dbpsadbw.")) {
2353     if (VecWidth == 128)
2354       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2355     else if (VecWidth == 256)
2356       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2357     else if (VecWidth == 512)
2358       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2359     else
2360       llvm_unreachable("Unexpected intrinsic");
2361   } else if (Name.starts_with("pmultishift.qb.")) {
2362     if (VecWidth == 128)
2363       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2364     else if (VecWidth == 256)
2365       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2366     else if (VecWidth == 512)
2367       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2368     else
2369       llvm_unreachable("Unexpected intrinsic");
2370   } else if (Name.starts_with("conflict.")) {
2371     if (Name[9] == 'd' && VecWidth == 128)
2372       IID = Intrinsic::x86_avx512_conflict_d_128;
2373     else if (Name[9] == 'd' && VecWidth == 256)
2374       IID = Intrinsic::x86_avx512_conflict_d_256;
2375     else if (Name[9] == 'd' && VecWidth == 512)
2376       IID = Intrinsic::x86_avx512_conflict_d_512;
2377     else if (Name[9] == 'q' && VecWidth == 128)
2378       IID = Intrinsic::x86_avx512_conflict_q_128;
2379     else if (Name[9] == 'q' && VecWidth == 256)
2380       IID = Intrinsic::x86_avx512_conflict_q_256;
2381     else if (Name[9] == 'q' && VecWidth == 512)
2382       IID = Intrinsic::x86_avx512_conflict_q_512;
2383     else
2384       llvm_unreachable("Unexpected intrinsic");
2385   } else if (Name.starts_with("pavg.")) {
2386     if (Name[5] == 'b' && VecWidth == 128)
2387       IID = Intrinsic::x86_sse2_pavg_b;
2388     else if (Name[5] == 'b' && VecWidth == 256)
2389       IID = Intrinsic::x86_avx2_pavg_b;
2390     else if (Name[5] == 'b' && VecWidth == 512)
2391       IID = Intrinsic::x86_avx512_pavg_b_512;
2392     else if (Name[5] == 'w' && VecWidth == 128)
2393       IID = Intrinsic::x86_sse2_pavg_w;
2394     else if (Name[5] == 'w' && VecWidth == 256)
2395       IID = Intrinsic::x86_avx2_pavg_w;
2396     else if (Name[5] == 'w' && VecWidth == 512)
2397       IID = Intrinsic::x86_avx512_pavg_w_512;
2398     else
2399       llvm_unreachable("Unexpected intrinsic");
2400   } else
2401     return false;
2402 
2403   SmallVector<Value *, 4> Args(CI.args());
2404   Args.pop_back();
2405   Args.pop_back();
2406   Rep = Builder.CreateIntrinsic(IID, Args);
2407   unsigned NumArgs = CI.arg_size();
2408   Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2409                       CI.getArgOperand(NumArgs - 2));
2410   return true;
2411 }
2412 
2413 /// Upgrade comment in call to inline asm that represents an objc retain release
2414 /// marker.
UpgradeInlineAsmString(std::string * AsmStr)2415 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2416   size_t Pos;
2417   if (AsmStr->find("mov\tfp") == 0 &&
2418       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2419       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2420     AsmStr->replace(Pos, 1, ";");
2421   }
2422 }
2423 
upgradeNVVMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2424 static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2425                                        Function *F, IRBuilder<> &Builder) {
2426   Value *Rep = nullptr;
2427 
2428   if (Name == "abs.i" || Name == "abs.ll") {
2429     Value *Arg = CI->getArgOperand(0);
2430     Value *Neg = Builder.CreateNeg(Arg, "neg");
2431     Value *Cmp = Builder.CreateICmpSGE(
2432         Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2433     Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2434   } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2435     Type *Ty = (Name == "abs.bf16")
2436                    ? Builder.getBFloatTy()
2437                    : FixedVectorType::get(Builder.getBFloatTy(), 2);
2438     Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2439     Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2440     Rep = Builder.CreateBitCast(Abs, CI->getType());
2441   } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2442     Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2443                                                : Intrinsic::nvvm_fabs;
2444     Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2445   } else if (Name.starts_with("atomic.load.add.f32.p") ||
2446              Name.starts_with("atomic.load.add.f64.p")) {
2447     Value *Ptr = CI->getArgOperand(0);
2448     Value *Val = CI->getArgOperand(1);
2449     Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2450                                   AtomicOrdering::SequentiallyConsistent);
2451   } else if (Name.starts_with("atomic.load.inc.32.p") ||
2452              Name.starts_with("atomic.load.dec.32.p")) {
2453     Value *Ptr = CI->getArgOperand(0);
2454     Value *Val = CI->getArgOperand(1);
2455     auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2456                                                   : AtomicRMWInst::UDecWrap;
2457     Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2458                                   AtomicOrdering::SequentiallyConsistent);
2459   } else if (Name.consume_front("max.") &&
2460              (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2461               Name == "ui" || Name == "ull")) {
2462     Value *Arg0 = CI->getArgOperand(0);
2463     Value *Arg1 = CI->getArgOperand(1);
2464     Value *Cmp = Name.starts_with("u")
2465                      ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2466                      : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2467     Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2468   } else if (Name.consume_front("min.") &&
2469              (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2470               Name == "ui" || Name == "ull")) {
2471     Value *Arg0 = CI->getArgOperand(0);
2472     Value *Arg1 = CI->getArgOperand(1);
2473     Value *Cmp = Name.starts_with("u")
2474                      ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2475                      : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2476     Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2477   } else if (Name == "clz.ll") {
2478     // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2479     Value *Arg = CI->getArgOperand(0);
2480     Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2481                                           {Arg, Builder.getFalse()},
2482                                           /*FMFSource=*/nullptr, "ctlz");
2483     Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2484   } else if (Name == "popc.ll") {
2485     // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2486     // i64.
2487     Value *Arg = CI->getArgOperand(0);
2488     Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2489                                           Arg, /*FMFSource=*/nullptr, "ctpop");
2490     Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2491   } else if (Name == "h2f") {
2492     Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2493                                   {Builder.getFloatTy()}, CI->getArgOperand(0),
2494                                   /*FMFSource=*/nullptr, "h2f");
2495   } else if (Name.consume_front("bitcast.") &&
2496              (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2497               Name == "d2ll")) {
2498     Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2499   } else if (Name == "rotate.b32") {
2500     Value *Arg = CI->getOperand(0);
2501     Value *ShiftAmt = CI->getOperand(1);
2502     Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2503                                   {Arg, Arg, ShiftAmt});
2504   } else if (Name == "rotate.b64") {
2505     Type *Int64Ty = Builder.getInt64Ty();
2506     Value *Arg = CI->getOperand(0);
2507     Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2508     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2509                                   {Arg, Arg, ZExtShiftAmt});
2510   } else if (Name == "rotate.right.b64") {
2511     Type *Int64Ty = Builder.getInt64Ty();
2512     Value *Arg = CI->getOperand(0);
2513     Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2514     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2515                                   {Arg, Arg, ZExtShiftAmt});
2516   } else if (Name == "swap.lo.hi.b64") {
2517     Type *Int64Ty = Builder.getInt64Ty();
2518     Value *Arg = CI->getOperand(0);
2519     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2520                                   {Arg, Arg, Builder.getInt64(32)});
2521   } else if ((Name.consume_front("ptr.gen.to.") &&
2522               consumeNVVMPtrAddrSpace(Name)) ||
2523              (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2524               Name.starts_with(".to.gen"))) {
2525     Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2526   } else if (Name.consume_front("ldg.global")) {
2527     Value *Ptr = CI->getArgOperand(0);
2528     Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2529     // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2530     Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2531     Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2532     MDNode *MD = MDNode::get(Builder.getContext(), {});
2533     LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2534     return LD;
2535   } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2536     Value *Arg =
2537         Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2538     Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2539                                   {}, {Arg});
2540   } else if (Name == "barrier") {
2541     Rep = Builder.CreateIntrinsic(
2542         Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2543         {CI->getArgOperand(0), CI->getArgOperand(1)});
2544   } else if (Name == "barrier.sync") {
2545     Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2546                                   {CI->getArgOperand(0)});
2547   } else if (Name == "barrier.sync.cnt") {
2548     Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2549                                   {CI->getArgOperand(0), CI->getArgOperand(1)});
2550   } else {
2551     Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2552     if (IID != Intrinsic::not_intrinsic &&
2553         !F->getReturnType()->getScalarType()->isBFloatTy()) {
2554       rename(F);
2555       Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2556       SmallVector<Value *, 2> Args;
2557       for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2558         Value *Arg = CI->getArgOperand(I);
2559         Type *OldType = Arg->getType();
2560         Type *NewType = NewFn->getArg(I)->getType();
2561         Args.push_back(
2562             (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2563                 ? Builder.CreateBitCast(Arg, NewType)
2564                 : Arg);
2565       }
2566       Rep = Builder.CreateCall(NewFn, Args);
2567       if (F->getReturnType()->isIntegerTy())
2568         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2569     }
2570   }
2571 
2572   return Rep;
2573 }
2574 
upgradeX86IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2575 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2576                                       IRBuilder<> &Builder) {
2577   LLVMContext &C = F->getContext();
2578   Value *Rep = nullptr;
2579 
2580   if (Name.starts_with("sse4a.movnt.")) {
2581     SmallVector<Metadata *, 1> Elts;
2582     Elts.push_back(
2583         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2584     MDNode *Node = MDNode::get(C, Elts);
2585 
2586     Value *Arg0 = CI->getArgOperand(0);
2587     Value *Arg1 = CI->getArgOperand(1);
2588 
2589     // Nontemporal (unaligned) store of the 0'th element of the float/double
2590     // vector.
2591     Value *Extract =
2592         Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2593 
2594     StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2595     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2596   } else if (Name.starts_with("avx.movnt.") ||
2597              Name.starts_with("avx512.storent.")) {
2598     SmallVector<Metadata *, 1> Elts;
2599     Elts.push_back(
2600         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2601     MDNode *Node = MDNode::get(C, Elts);
2602 
2603     Value *Arg0 = CI->getArgOperand(0);
2604     Value *Arg1 = CI->getArgOperand(1);
2605 
2606     StoreInst *SI = Builder.CreateAlignedStore(
2607         Arg1, Arg0,
2608         Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2609     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2610   } else if (Name == "sse2.storel.dq") {
2611     Value *Arg0 = CI->getArgOperand(0);
2612     Value *Arg1 = CI->getArgOperand(1);
2613 
2614     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2615     Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2616     Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2617     Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2618   } else if (Name.starts_with("sse.storeu.") ||
2619              Name.starts_with("sse2.storeu.") ||
2620              Name.starts_with("avx.storeu.")) {
2621     Value *Arg0 = CI->getArgOperand(0);
2622     Value *Arg1 = CI->getArgOperand(1);
2623     Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2624   } else if (Name == "avx512.mask.store.ss") {
2625     Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2626     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2627                        Mask, false);
2628   } else if (Name.starts_with("avx512.mask.store")) {
2629     // "avx512.mask.storeu." or "avx512.mask.store."
2630     bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2631     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2632                        CI->getArgOperand(2), Aligned);
2633   } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2634     // Upgrade packed integer vector compare intrinsics to compare instructions.
2635     // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2636     bool CmpEq = Name[9] == 'e';
2637     Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2638                              CI->getArgOperand(0), CI->getArgOperand(1));
2639     Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2640   } else if (Name.starts_with("avx512.broadcastm")) {
2641     Type *ExtTy = Type::getInt32Ty(C);
2642     if (CI->getOperand(0)->getType()->isIntegerTy(8))
2643       ExtTy = Type::getInt64Ty(C);
2644     unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2645                        ExtTy->getPrimitiveSizeInBits();
2646     Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2647     Rep = Builder.CreateVectorSplat(NumElts, Rep);
2648   } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2649     Value *Vec = CI->getArgOperand(0);
2650     Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2651     Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2652     Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2653   } else if (Name.starts_with("avx.sqrt.p") ||
2654              Name.starts_with("sse2.sqrt.p") ||
2655              Name.starts_with("sse.sqrt.p")) {
2656     Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2657                                   {CI->getArgOperand(0)});
2658   } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2659     if (CI->arg_size() == 4 &&
2660         (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2661          cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2662       Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2663                                           : Intrinsic::x86_avx512_sqrt_pd_512;
2664 
2665       Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2666       Rep = Builder.CreateIntrinsic(IID, Args);
2667     } else {
2668       Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2669                                     {CI->getArgOperand(0)});
2670     }
2671     Rep =
2672         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2673   } else if (Name.starts_with("avx512.ptestm") ||
2674              Name.starts_with("avx512.ptestnm")) {
2675     Value *Op0 = CI->getArgOperand(0);
2676     Value *Op1 = CI->getArgOperand(1);
2677     Value *Mask = CI->getArgOperand(2);
2678     Rep = Builder.CreateAnd(Op0, Op1);
2679     llvm::Type *Ty = Op0->getType();
2680     Value *Zero = llvm::Constant::getNullValue(Ty);
2681     ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2682                                    ? ICmpInst::ICMP_NE
2683                                    : ICmpInst::ICMP_EQ;
2684     Rep = Builder.CreateICmp(Pred, Rep, Zero);
2685     Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2686   } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2687     unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2688                            ->getNumElements();
2689     Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2690     Rep =
2691         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2692   } else if (Name.starts_with("avx512.kunpck")) {
2693     unsigned NumElts = CI->getType()->getScalarSizeInBits();
2694     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2695     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2696     int Indices[64];
2697     for (unsigned i = 0; i != NumElts; ++i)
2698       Indices[i] = i;
2699 
2700     // First extract half of each vector. This gives better codegen than
2701     // doing it in a single shuffle.
2702     LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2703     RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2704     // Concat the vectors.
2705     // NOTE: Operands have to be swapped to match intrinsic definition.
2706     Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2707     Rep = Builder.CreateBitCast(Rep, CI->getType());
2708   } else if (Name == "avx512.kand.w") {
2709     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2710     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2711     Rep = Builder.CreateAnd(LHS, RHS);
2712     Rep = Builder.CreateBitCast(Rep, CI->getType());
2713   } else if (Name == "avx512.kandn.w") {
2714     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2715     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2716     LHS = Builder.CreateNot(LHS);
2717     Rep = Builder.CreateAnd(LHS, RHS);
2718     Rep = Builder.CreateBitCast(Rep, CI->getType());
2719   } else if (Name == "avx512.kor.w") {
2720     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2721     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2722     Rep = Builder.CreateOr(LHS, RHS);
2723     Rep = Builder.CreateBitCast(Rep, CI->getType());
2724   } else if (Name == "avx512.kxor.w") {
2725     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2726     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2727     Rep = Builder.CreateXor(LHS, RHS);
2728     Rep = Builder.CreateBitCast(Rep, CI->getType());
2729   } else if (Name == "avx512.kxnor.w") {
2730     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2731     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2732     LHS = Builder.CreateNot(LHS);
2733     Rep = Builder.CreateXor(LHS, RHS);
2734     Rep = Builder.CreateBitCast(Rep, CI->getType());
2735   } else if (Name == "avx512.knot.w") {
2736     Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2737     Rep = Builder.CreateNot(Rep);
2738     Rep = Builder.CreateBitCast(Rep, CI->getType());
2739   } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2740     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2741     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2742     Rep = Builder.CreateOr(LHS, RHS);
2743     Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2744     Value *C;
2745     if (Name[14] == 'c')
2746       C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2747     else
2748       C = ConstantInt::getNullValue(Builder.getInt16Ty());
2749     Rep = Builder.CreateICmpEQ(Rep, C);
2750     Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2751   } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2752              Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2753              Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2754              Name == "sse.div.ss" || Name == "sse2.div.sd") {
2755     Type *I32Ty = Type::getInt32Ty(C);
2756     Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2757                                                ConstantInt::get(I32Ty, 0));
2758     Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2759                                                ConstantInt::get(I32Ty, 0));
2760     Value *EltOp;
2761     if (Name.contains(".add."))
2762       EltOp = Builder.CreateFAdd(Elt0, Elt1);
2763     else if (Name.contains(".sub."))
2764       EltOp = Builder.CreateFSub(Elt0, Elt1);
2765     else if (Name.contains(".mul."))
2766       EltOp = Builder.CreateFMul(Elt0, Elt1);
2767     else
2768       EltOp = Builder.CreateFDiv(Elt0, Elt1);
2769     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2770                                       ConstantInt::get(I32Ty, 0));
2771   } else if (Name.starts_with("avx512.mask.pcmp")) {
2772     // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2773     bool CmpEq = Name[16] == 'e';
2774     Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2775   } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2776     Type *OpTy = CI->getArgOperand(0)->getType();
2777     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2778     Intrinsic::ID IID;
2779     switch (VecWidth) {
2780     default:
2781       llvm_unreachable("Unexpected intrinsic");
2782     case 128:
2783       IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2784       break;
2785     case 256:
2786       IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2787       break;
2788     case 512:
2789       IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2790       break;
2791     }
2792 
2793     Rep =
2794         Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2795     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2796   } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2797     Type *OpTy = CI->getArgOperand(0)->getType();
2798     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2799     unsigned EltWidth = OpTy->getScalarSizeInBits();
2800     Intrinsic::ID IID;
2801     if (VecWidth == 128 && EltWidth == 32)
2802       IID = Intrinsic::x86_avx512_fpclass_ps_128;
2803     else if (VecWidth == 256 && EltWidth == 32)
2804       IID = Intrinsic::x86_avx512_fpclass_ps_256;
2805     else if (VecWidth == 512 && EltWidth == 32)
2806       IID = Intrinsic::x86_avx512_fpclass_ps_512;
2807     else if (VecWidth == 128 && EltWidth == 64)
2808       IID = Intrinsic::x86_avx512_fpclass_pd_128;
2809     else if (VecWidth == 256 && EltWidth == 64)
2810       IID = Intrinsic::x86_avx512_fpclass_pd_256;
2811     else if (VecWidth == 512 && EltWidth == 64)
2812       IID = Intrinsic::x86_avx512_fpclass_pd_512;
2813     else
2814       llvm_unreachable("Unexpected intrinsic");
2815 
2816     Rep =
2817         Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2818     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2819   } else if (Name.starts_with("avx512.cmp.p")) {
2820     SmallVector<Value *, 4> Args(CI->args());
2821     Type *OpTy = Args[0]->getType();
2822     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2823     unsigned EltWidth = OpTy->getScalarSizeInBits();
2824     Intrinsic::ID IID;
2825     if (VecWidth == 128 && EltWidth == 32)
2826       IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2827     else if (VecWidth == 256 && EltWidth == 32)
2828       IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2829     else if (VecWidth == 512 && EltWidth == 32)
2830       IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2831     else if (VecWidth == 128 && EltWidth == 64)
2832       IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2833     else if (VecWidth == 256 && EltWidth == 64)
2834       IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2835     else if (VecWidth == 512 && EltWidth == 64)
2836       IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2837     else
2838       llvm_unreachable("Unexpected intrinsic");
2839 
2840     Value *Mask = Constant::getAllOnesValue(CI->getType());
2841     if (VecWidth == 512)
2842       std::swap(Mask, Args.back());
2843     Args.push_back(Mask);
2844 
2845     Rep = Builder.CreateIntrinsic(IID, Args);
2846   } else if (Name.starts_with("avx512.mask.cmp.")) {
2847     // Integer compare intrinsics.
2848     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2849     Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2850   } else if (Name.starts_with("avx512.mask.ucmp.")) {
2851     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2852     Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2853   } else if (Name.starts_with("avx512.cvtb2mask.") ||
2854              Name.starts_with("avx512.cvtw2mask.") ||
2855              Name.starts_with("avx512.cvtd2mask.") ||
2856              Name.starts_with("avx512.cvtq2mask.")) {
2857     Value *Op = CI->getArgOperand(0);
2858     Value *Zero = llvm::Constant::getNullValue(Op->getType());
2859     Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2860     Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2861   } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2862              Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2863              Name.starts_with("avx512.mask.pabs")) {
2864     Rep = upgradeAbs(Builder, *CI);
2865   } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2866              Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2867              Name.starts_with("avx512.mask.pmaxs")) {
2868     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2869   } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2870              Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2871              Name.starts_with("avx512.mask.pmaxu")) {
2872     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2873   } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2874              Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2875              Name.starts_with("avx512.mask.pmins")) {
2876     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2877   } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2878              Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2879              Name.starts_with("avx512.mask.pminu")) {
2880     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2881   } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2882              Name == "avx512.pmulu.dq.512" ||
2883              Name.starts_with("avx512.mask.pmulu.dq.")) {
2884     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2885   } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2886              Name == "avx512.pmul.dq.512" ||
2887              Name.starts_with("avx512.mask.pmul.dq.")) {
2888     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2889   } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2890              Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2891     Rep =
2892         Builder.CreateSIToFP(CI->getArgOperand(1),
2893                              cast<VectorType>(CI->getType())->getElementType());
2894     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2895   } else if (Name == "avx512.cvtusi2sd") {
2896     Rep =
2897         Builder.CreateUIToFP(CI->getArgOperand(1),
2898                              cast<VectorType>(CI->getType())->getElementType());
2899     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2900   } else if (Name == "sse2.cvtss2sd") {
2901     Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2902     Rep = Builder.CreateFPExt(
2903         Rep, cast<VectorType>(CI->getType())->getElementType());
2904     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2905   } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2906              Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2907              Name.starts_with("avx512.mask.cvtdq2pd.") ||
2908              Name.starts_with("avx512.mask.cvtudq2pd.") ||
2909              Name.starts_with("avx512.mask.cvtdq2ps.") ||
2910              Name.starts_with("avx512.mask.cvtudq2ps.") ||
2911              Name.starts_with("avx512.mask.cvtqq2pd.") ||
2912              Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2913              Name == "avx512.mask.cvtqq2ps.256" ||
2914              Name == "avx512.mask.cvtqq2ps.512" ||
2915              Name == "avx512.mask.cvtuqq2ps.256" ||
2916              Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2917              Name == "avx.cvt.ps2.pd.256" ||
2918              Name == "avx512.mask.cvtps2pd.128" ||
2919              Name == "avx512.mask.cvtps2pd.256") {
2920     auto *DstTy = cast<FixedVectorType>(CI->getType());
2921     Rep = CI->getArgOperand(0);
2922     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2923 
2924     unsigned NumDstElts = DstTy->getNumElements();
2925     if (NumDstElts < SrcTy->getNumElements()) {
2926       assert(NumDstElts == 2 && "Unexpected vector size");
2927       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2928     }
2929 
2930     bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2931     bool IsUnsigned = Name.contains("cvtu");
2932     if (IsPS2PD)
2933       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2934     else if (CI->arg_size() == 4 &&
2935              (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2936               cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2937       Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2938                                      : Intrinsic::x86_avx512_sitofp_round;
2939       Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2940                                     {Rep, CI->getArgOperand(3)});
2941     } else {
2942       Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2943                        : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2944     }
2945 
2946     if (CI->arg_size() >= 3)
2947       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2948                           CI->getArgOperand(1));
2949   } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2950              Name.starts_with("vcvtph2ps.")) {
2951     auto *DstTy = cast<FixedVectorType>(CI->getType());
2952     Rep = CI->getArgOperand(0);
2953     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2954     unsigned NumDstElts = DstTy->getNumElements();
2955     if (NumDstElts != SrcTy->getNumElements()) {
2956       assert(NumDstElts == 4 && "Unexpected vector size");
2957       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2958     }
2959     Rep = Builder.CreateBitCast(
2960         Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2961     Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2962     if (CI->arg_size() >= 3)
2963       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2964                           CI->getArgOperand(1));
2965   } else if (Name.starts_with("avx512.mask.load")) {
2966     // "avx512.mask.loadu." or "avx512.mask.load."
2967     bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2968     Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2969                             CI->getArgOperand(2), Aligned);
2970   } else if (Name.starts_with("avx512.mask.expand.load.")) {
2971     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2972     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2973                                    ResultTy->getNumElements());
2974 
2975     Rep = Builder.CreateIntrinsic(
2976         Intrinsic::masked_expandload, ResultTy,
2977         {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2978   } else if (Name.starts_with("avx512.mask.compress.store.")) {
2979     auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2980     Value *MaskVec =
2981         getX86MaskVec(Builder, CI->getArgOperand(2),
2982                       cast<FixedVectorType>(ResultTy)->getNumElements());
2983 
2984     Rep = Builder.CreateIntrinsic(
2985         Intrinsic::masked_compressstore, ResultTy,
2986         {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2987   } else if (Name.starts_with("avx512.mask.compress.") ||
2988              Name.starts_with("avx512.mask.expand.")) {
2989     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2990 
2991     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2992                                    ResultTy->getNumElements());
2993 
2994     bool IsCompress = Name[12] == 'c';
2995     Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2996                                    : Intrinsic::x86_avx512_mask_expand;
2997     Rep = Builder.CreateIntrinsic(
2998         IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2999   } else if (Name.starts_with("xop.vpcom")) {
3000     bool IsSigned;
3001     if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3002         Name.ends_with("uq"))
3003       IsSigned = false;
3004     else if (Name.ends_with("b") || Name.ends_with("w") ||
3005              Name.ends_with("d") || Name.ends_with("q"))
3006       IsSigned = true;
3007     else
3008       llvm_unreachable("Unknown suffix");
3009 
3010     unsigned Imm;
3011     if (CI->arg_size() == 3) {
3012       Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3013     } else {
3014       Name = Name.substr(9); // strip off "xop.vpcom"
3015       if (Name.starts_with("lt"))
3016         Imm = 0;
3017       else if (Name.starts_with("le"))
3018         Imm = 1;
3019       else if (Name.starts_with("gt"))
3020         Imm = 2;
3021       else if (Name.starts_with("ge"))
3022         Imm = 3;
3023       else if (Name.starts_with("eq"))
3024         Imm = 4;
3025       else if (Name.starts_with("ne"))
3026         Imm = 5;
3027       else if (Name.starts_with("false"))
3028         Imm = 6;
3029       else if (Name.starts_with("true"))
3030         Imm = 7;
3031       else
3032         llvm_unreachable("Unknown condition");
3033     }
3034 
3035     Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3036   } else if (Name.starts_with("xop.vpcmov")) {
3037     Value *Sel = CI->getArgOperand(2);
3038     Value *NotSel = Builder.CreateNot(Sel);
3039     Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3040     Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3041     Rep = Builder.CreateOr(Sel0, Sel1);
3042   } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3043              Name.starts_with("avx512.mask.prol")) {
3044     Rep = upgradeX86Rotate(Builder, *CI, false);
3045   } else if (Name.starts_with("avx512.pror") ||
3046              Name.starts_with("avx512.mask.pror")) {
3047     Rep = upgradeX86Rotate(Builder, *CI, true);
3048   } else if (Name.starts_with("avx512.vpshld.") ||
3049              Name.starts_with("avx512.mask.vpshld") ||
3050              Name.starts_with("avx512.maskz.vpshld")) {
3051     bool ZeroMask = Name[11] == 'z';
3052     Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3053   } else if (Name.starts_with("avx512.vpshrd.") ||
3054              Name.starts_with("avx512.mask.vpshrd") ||
3055              Name.starts_with("avx512.maskz.vpshrd")) {
3056     bool ZeroMask = Name[11] == 'z';
3057     Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3058   } else if (Name == "sse42.crc32.64.8") {
3059     Value *Trunc0 =
3060         Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3061     Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3062                                   {Trunc0, CI->getArgOperand(1)});
3063     Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3064   } else if (Name.starts_with("avx.vbroadcast.s") ||
3065              Name.starts_with("avx512.vbroadcast.s")) {
3066     // Replace broadcasts with a series of insertelements.
3067     auto *VecTy = cast<FixedVectorType>(CI->getType());
3068     Type *EltTy = VecTy->getElementType();
3069     unsigned EltNum = VecTy->getNumElements();
3070     Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3071     Type *I32Ty = Type::getInt32Ty(C);
3072     Rep = PoisonValue::get(VecTy);
3073     for (unsigned I = 0; I < EltNum; ++I)
3074       Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3075   } else if (Name.starts_with("sse41.pmovsx") ||
3076              Name.starts_with("sse41.pmovzx") ||
3077              Name.starts_with("avx2.pmovsx") ||
3078              Name.starts_with("avx2.pmovzx") ||
3079              Name.starts_with("avx512.mask.pmovsx") ||
3080              Name.starts_with("avx512.mask.pmovzx")) {
3081     auto *DstTy = cast<FixedVectorType>(CI->getType());
3082     unsigned NumDstElts = DstTy->getNumElements();
3083 
3084     // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3085     SmallVector<int, 8> ShuffleMask(NumDstElts);
3086     for (unsigned i = 0; i != NumDstElts; ++i)
3087       ShuffleMask[i] = i;
3088 
3089     Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3090 
3091     bool DoSext = Name.contains("pmovsx");
3092     Rep =
3093         DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3094     // If there are 3 arguments, it's a masked intrinsic so we need a select.
3095     if (CI->arg_size() == 3)
3096       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3097                           CI->getArgOperand(1));
3098   } else if (Name == "avx512.mask.pmov.qd.256" ||
3099              Name == "avx512.mask.pmov.qd.512" ||
3100              Name == "avx512.mask.pmov.wb.256" ||
3101              Name == "avx512.mask.pmov.wb.512") {
3102     Type *Ty = CI->getArgOperand(1)->getType();
3103     Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3104     Rep =
3105         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3106   } else if (Name.starts_with("avx.vbroadcastf128") ||
3107              Name == "avx2.vbroadcasti128") {
3108     // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3109     Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3110     unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3111     auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3112     Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3113     if (NumSrcElts == 2)
3114       Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3115     else
3116       Rep = Builder.CreateShuffleVector(Load,
3117                                         ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3118   } else if (Name.starts_with("avx512.mask.shuf.i") ||
3119              Name.starts_with("avx512.mask.shuf.f")) {
3120     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3121     Type *VT = CI->getType();
3122     unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3123     unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3124     unsigned ControlBitsMask = NumLanes - 1;
3125     unsigned NumControlBits = NumLanes / 2;
3126     SmallVector<int, 8> ShuffleMask(0);
3127 
3128     for (unsigned l = 0; l != NumLanes; ++l) {
3129       unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3130       // We actually need the other source.
3131       if (l >= NumLanes / 2)
3132         LaneMask += NumLanes;
3133       for (unsigned i = 0; i != NumElementsInLane; ++i)
3134         ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3135     }
3136     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3137                                       CI->getArgOperand(1), ShuffleMask);
3138     Rep =
3139         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3140   } else if (Name.starts_with("avx512.mask.broadcastf") ||
3141              Name.starts_with("avx512.mask.broadcasti")) {
3142     unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3143                               ->getNumElements();
3144     unsigned NumDstElts =
3145         cast<FixedVectorType>(CI->getType())->getNumElements();
3146 
3147     SmallVector<int, 8> ShuffleMask(NumDstElts);
3148     for (unsigned i = 0; i != NumDstElts; ++i)
3149       ShuffleMask[i] = i % NumSrcElts;
3150 
3151     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3152                                       CI->getArgOperand(0), ShuffleMask);
3153     Rep =
3154         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3155   } else if (Name.starts_with("avx2.pbroadcast") ||
3156              Name.starts_with("avx2.vbroadcast") ||
3157              Name.starts_with("avx512.pbroadcast") ||
3158              Name.starts_with("avx512.mask.broadcast.s")) {
3159     // Replace vp?broadcasts with a vector shuffle.
3160     Value *Op = CI->getArgOperand(0);
3161     ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3162     Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3163     SmallVector<int, 8> M;
3164     ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3165     Rep = Builder.CreateShuffleVector(Op, M);
3166 
3167     if (CI->arg_size() == 3)
3168       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3169                           CI->getArgOperand(1));
3170   } else if (Name.starts_with("sse2.padds.") ||
3171              Name.starts_with("avx2.padds.") ||
3172              Name.starts_with("avx512.padds.") ||
3173              Name.starts_with("avx512.mask.padds.")) {
3174     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3175   } else if (Name.starts_with("sse2.psubs.") ||
3176              Name.starts_with("avx2.psubs.") ||
3177              Name.starts_with("avx512.psubs.") ||
3178              Name.starts_with("avx512.mask.psubs.")) {
3179     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3180   } else if (Name.starts_with("sse2.paddus.") ||
3181              Name.starts_with("avx2.paddus.") ||
3182              Name.starts_with("avx512.mask.paddus.")) {
3183     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3184   } else if (Name.starts_with("sse2.psubus.") ||
3185              Name.starts_with("avx2.psubus.") ||
3186              Name.starts_with("avx512.mask.psubus.")) {
3187     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3188   } else if (Name.starts_with("avx512.mask.palignr.")) {
3189     Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3190                                     CI->getArgOperand(1), CI->getArgOperand(2),
3191                                     CI->getArgOperand(3), CI->getArgOperand(4),
3192                                     false);
3193   } else if (Name.starts_with("avx512.mask.valign.")) {
3194     Rep = upgradeX86ALIGNIntrinsics(
3195         Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3196         CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3197   } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3198     // 128/256-bit shift left specified in bits.
3199     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3200     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3201                                      Shift / 8); // Shift is in bits.
3202   } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3203     // 128/256-bit shift right specified in bits.
3204     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3205     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3206                                      Shift / 8); // Shift is in bits.
3207   } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3208              Name == "avx512.psll.dq.512") {
3209     // 128/256/512-bit shift left specified in bytes.
3210     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3211     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3212   } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3213              Name == "avx512.psrl.dq.512") {
3214     // 128/256/512-bit shift right specified in bytes.
3215     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3216     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3217   } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3218              Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3219              Name.starts_with("avx2.pblendd.")) {
3220     Value *Op0 = CI->getArgOperand(0);
3221     Value *Op1 = CI->getArgOperand(1);
3222     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3223     auto *VecTy = cast<FixedVectorType>(CI->getType());
3224     unsigned NumElts = VecTy->getNumElements();
3225 
3226     SmallVector<int, 16> Idxs(NumElts);
3227     for (unsigned i = 0; i != NumElts; ++i)
3228       Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3229 
3230     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3231   } else if (Name.starts_with("avx.vinsertf128.") ||
3232              Name == "avx2.vinserti128" ||
3233              Name.starts_with("avx512.mask.insert")) {
3234     Value *Op0 = CI->getArgOperand(0);
3235     Value *Op1 = CI->getArgOperand(1);
3236     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3237     unsigned DstNumElts =
3238         cast<FixedVectorType>(CI->getType())->getNumElements();
3239     unsigned SrcNumElts =
3240         cast<FixedVectorType>(Op1->getType())->getNumElements();
3241     unsigned Scale = DstNumElts / SrcNumElts;
3242 
3243     // Mask off the high bits of the immediate value; hardware ignores those.
3244     Imm = Imm % Scale;
3245 
3246     // Extend the second operand into a vector the size of the destination.
3247     SmallVector<int, 8> Idxs(DstNumElts);
3248     for (unsigned i = 0; i != SrcNumElts; ++i)
3249       Idxs[i] = i;
3250     for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3251       Idxs[i] = SrcNumElts;
3252     Rep = Builder.CreateShuffleVector(Op1, Idxs);
3253 
3254     // Insert the second operand into the first operand.
3255 
3256     // Note that there is no guarantee that instruction lowering will actually
3257     // produce a vinsertf128 instruction for the created shuffles. In
3258     // particular, the 0 immediate case involves no lane changes, so it can
3259     // be handled as a blend.
3260 
3261     // Example of shuffle mask for 32-bit elements:
3262     // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3263     // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3264 
3265     // First fill with identify mask.
3266     for (unsigned i = 0; i != DstNumElts; ++i)
3267       Idxs[i] = i;
3268     // Then replace the elements where we need to insert.
3269     for (unsigned i = 0; i != SrcNumElts; ++i)
3270       Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3271     Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3272 
3273     // If the intrinsic has a mask operand, handle that.
3274     if (CI->arg_size() == 5)
3275       Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3276                           CI->getArgOperand(3));
3277   } else if (Name.starts_with("avx.vextractf128.") ||
3278              Name == "avx2.vextracti128" ||
3279              Name.starts_with("avx512.mask.vextract")) {
3280     Value *Op0 = CI->getArgOperand(0);
3281     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3282     unsigned DstNumElts =
3283         cast<FixedVectorType>(CI->getType())->getNumElements();
3284     unsigned SrcNumElts =
3285         cast<FixedVectorType>(Op0->getType())->getNumElements();
3286     unsigned Scale = SrcNumElts / DstNumElts;
3287 
3288     // Mask off the high bits of the immediate value; hardware ignores those.
3289     Imm = Imm % Scale;
3290 
3291     // Get indexes for the subvector of the input vector.
3292     SmallVector<int, 8> Idxs(DstNumElts);
3293     for (unsigned i = 0; i != DstNumElts; ++i) {
3294       Idxs[i] = i + (Imm * DstNumElts);
3295     }
3296     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3297 
3298     // If the intrinsic has a mask operand, handle that.
3299     if (CI->arg_size() == 4)
3300       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3301                           CI->getArgOperand(2));
3302   } else if (Name.starts_with("avx512.mask.perm.df.") ||
3303              Name.starts_with("avx512.mask.perm.di.")) {
3304     Value *Op0 = CI->getArgOperand(0);
3305     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3306     auto *VecTy = cast<FixedVectorType>(CI->getType());
3307     unsigned NumElts = VecTy->getNumElements();
3308 
3309     SmallVector<int, 8> Idxs(NumElts);
3310     for (unsigned i = 0; i != NumElts; ++i)
3311       Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3312 
3313     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3314 
3315     if (CI->arg_size() == 4)
3316       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3317                           CI->getArgOperand(2));
3318   } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3319     // The immediate permute control byte looks like this:
3320     //    [1:0] - select 128 bits from sources for low half of destination
3321     //    [2]   - ignore
3322     //    [3]   - zero low half of destination
3323     //    [5:4] - select 128 bits from sources for high half of destination
3324     //    [6]   - ignore
3325     //    [7]   - zero high half of destination
3326 
3327     uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3328 
3329     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3330     unsigned HalfSize = NumElts / 2;
3331     SmallVector<int, 8> ShuffleMask(NumElts);
3332 
3333     // Determine which operand(s) are actually in use for this instruction.
3334     Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3335     Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3336 
3337     // If needed, replace operands based on zero mask.
3338     V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3339     V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3340 
3341     // Permute low half of result.
3342     unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3343     for (unsigned i = 0; i < HalfSize; ++i)
3344       ShuffleMask[i] = StartIndex + i;
3345 
3346     // Permute high half of result.
3347     StartIndex = (Imm & 0x10) ? HalfSize : 0;
3348     for (unsigned i = 0; i < HalfSize; ++i)
3349       ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3350 
3351     Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3352 
3353   } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3354              Name.starts_with("avx512.mask.vpermil.p") ||
3355              Name.starts_with("avx512.mask.pshuf.d.")) {
3356     Value *Op0 = CI->getArgOperand(0);
3357     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3358     auto *VecTy = cast<FixedVectorType>(CI->getType());
3359     unsigned NumElts = VecTy->getNumElements();
3360     // Calculate the size of each index in the immediate.
3361     unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3362     unsigned IdxMask = ((1 << IdxSize) - 1);
3363 
3364     SmallVector<int, 8> Idxs(NumElts);
3365     // Lookup the bits for this element, wrapping around the immediate every
3366     // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3367     // to offset by the first index of each group.
3368     for (unsigned i = 0; i != NumElts; ++i)
3369       Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3370 
3371     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3372 
3373     if (CI->arg_size() == 4)
3374       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3375                           CI->getArgOperand(2));
3376   } else if (Name == "sse2.pshufl.w" ||
3377              Name.starts_with("avx512.mask.pshufl.w.")) {
3378     Value *Op0 = CI->getArgOperand(0);
3379     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3380     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3381 
3382     SmallVector<int, 16> Idxs(NumElts);
3383     for (unsigned l = 0; l != NumElts; l += 8) {
3384       for (unsigned i = 0; i != 4; ++i)
3385         Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3386       for (unsigned i = 4; i != 8; ++i)
3387         Idxs[i + l] = i + l;
3388     }
3389 
3390     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3391 
3392     if (CI->arg_size() == 4)
3393       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3394                           CI->getArgOperand(2));
3395   } else if (Name == "sse2.pshufh.w" ||
3396              Name.starts_with("avx512.mask.pshufh.w.")) {
3397     Value *Op0 = CI->getArgOperand(0);
3398     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3399     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3400 
3401     SmallVector<int, 16> Idxs(NumElts);
3402     for (unsigned l = 0; l != NumElts; l += 8) {
3403       for (unsigned i = 0; i != 4; ++i)
3404         Idxs[i + l] = i + l;
3405       for (unsigned i = 0; i != 4; ++i)
3406         Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3407     }
3408 
3409     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3410 
3411     if (CI->arg_size() == 4)
3412       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3413                           CI->getArgOperand(2));
3414   } else if (Name.starts_with("avx512.mask.shuf.p")) {
3415     Value *Op0 = CI->getArgOperand(0);
3416     Value *Op1 = CI->getArgOperand(1);
3417     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3418     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3419 
3420     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3421     unsigned HalfLaneElts = NumLaneElts / 2;
3422 
3423     SmallVector<int, 16> Idxs(NumElts);
3424     for (unsigned i = 0; i != NumElts; ++i) {
3425       // Base index is the starting element of the lane.
3426       Idxs[i] = i - (i % NumLaneElts);
3427       // If we are half way through the lane switch to the other source.
3428       if ((i % NumLaneElts) >= HalfLaneElts)
3429         Idxs[i] += NumElts;
3430       // Now select the specific element. By adding HalfLaneElts bits from
3431       // the immediate. Wrapping around the immediate every 8-bits.
3432       Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3433     }
3434 
3435     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3436 
3437     Rep =
3438         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3439   } else if (Name.starts_with("avx512.mask.movddup") ||
3440              Name.starts_with("avx512.mask.movshdup") ||
3441              Name.starts_with("avx512.mask.movsldup")) {
3442     Value *Op0 = CI->getArgOperand(0);
3443     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3444     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3445 
3446     unsigned Offset = 0;
3447     if (Name.starts_with("avx512.mask.movshdup."))
3448       Offset = 1;
3449 
3450     SmallVector<int, 16> Idxs(NumElts);
3451     for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3452       for (unsigned i = 0; i != NumLaneElts; i += 2) {
3453         Idxs[i + l + 0] = i + l + Offset;
3454         Idxs[i + l + 1] = i + l + Offset;
3455       }
3456 
3457     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3458 
3459     Rep =
3460         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3461   } else if (Name.starts_with("avx512.mask.punpckl") ||
3462              Name.starts_with("avx512.mask.unpckl.")) {
3463     Value *Op0 = CI->getArgOperand(0);
3464     Value *Op1 = CI->getArgOperand(1);
3465     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3466     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3467 
3468     SmallVector<int, 64> Idxs(NumElts);
3469     for (int l = 0; l != NumElts; l += NumLaneElts)
3470       for (int i = 0; i != NumLaneElts; ++i)
3471         Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3472 
3473     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3474 
3475     Rep =
3476         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3477   } else if (Name.starts_with("avx512.mask.punpckh") ||
3478              Name.starts_with("avx512.mask.unpckh.")) {
3479     Value *Op0 = CI->getArgOperand(0);
3480     Value *Op1 = CI->getArgOperand(1);
3481     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3482     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3483 
3484     SmallVector<int, 64> Idxs(NumElts);
3485     for (int l = 0; l != NumElts; l += NumLaneElts)
3486       for (int i = 0; i != NumLaneElts; ++i)
3487         Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3488 
3489     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3490 
3491     Rep =
3492         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3493   } else if (Name.starts_with("avx512.mask.and.") ||
3494              Name.starts_with("avx512.mask.pand.")) {
3495     VectorType *FTy = cast<VectorType>(CI->getType());
3496     VectorType *ITy = VectorType::getInteger(FTy);
3497     Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3498                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3499     Rep = Builder.CreateBitCast(Rep, FTy);
3500     Rep =
3501         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3502   } else if (Name.starts_with("avx512.mask.andn.") ||
3503              Name.starts_with("avx512.mask.pandn.")) {
3504     VectorType *FTy = cast<VectorType>(CI->getType());
3505     VectorType *ITy = VectorType::getInteger(FTy);
3506     Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3507     Rep = Builder.CreateAnd(Rep,
3508                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3509     Rep = Builder.CreateBitCast(Rep, FTy);
3510     Rep =
3511         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3512   } else if (Name.starts_with("avx512.mask.or.") ||
3513              Name.starts_with("avx512.mask.por.")) {
3514     VectorType *FTy = cast<VectorType>(CI->getType());
3515     VectorType *ITy = VectorType::getInteger(FTy);
3516     Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3517                            Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3518     Rep = Builder.CreateBitCast(Rep, FTy);
3519     Rep =
3520         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3521   } else if (Name.starts_with("avx512.mask.xor.") ||
3522              Name.starts_with("avx512.mask.pxor.")) {
3523     VectorType *FTy = cast<VectorType>(CI->getType());
3524     VectorType *ITy = VectorType::getInteger(FTy);
3525     Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3526                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3527     Rep = Builder.CreateBitCast(Rep, FTy);
3528     Rep =
3529         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3530   } else if (Name.starts_with("avx512.mask.padd.")) {
3531     Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3532     Rep =
3533         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3534   } else if (Name.starts_with("avx512.mask.psub.")) {
3535     Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3536     Rep =
3537         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3538   } else if (Name.starts_with("avx512.mask.pmull.")) {
3539     Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3540     Rep =
3541         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3542   } else if (Name.starts_with("avx512.mask.add.p")) {
3543     if (Name.ends_with(".512")) {
3544       Intrinsic::ID IID;
3545       if (Name[17] == 's')
3546         IID = Intrinsic::x86_avx512_add_ps_512;
3547       else
3548         IID = Intrinsic::x86_avx512_add_pd_512;
3549 
3550       Rep = Builder.CreateIntrinsic(
3551           IID,
3552           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3553     } else {
3554       Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3555     }
3556     Rep =
3557         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3558   } else if (Name.starts_with("avx512.mask.div.p")) {
3559     if (Name.ends_with(".512")) {
3560       Intrinsic::ID IID;
3561       if (Name[17] == 's')
3562         IID = Intrinsic::x86_avx512_div_ps_512;
3563       else
3564         IID = Intrinsic::x86_avx512_div_pd_512;
3565 
3566       Rep = Builder.CreateIntrinsic(
3567           IID,
3568           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3569     } else {
3570       Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3571     }
3572     Rep =
3573         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3574   } else if (Name.starts_with("avx512.mask.mul.p")) {
3575     if (Name.ends_with(".512")) {
3576       Intrinsic::ID IID;
3577       if (Name[17] == 's')
3578         IID = Intrinsic::x86_avx512_mul_ps_512;
3579       else
3580         IID = Intrinsic::x86_avx512_mul_pd_512;
3581 
3582       Rep = Builder.CreateIntrinsic(
3583           IID,
3584           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3585     } else {
3586       Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3587     }
3588     Rep =
3589         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3590   } else if (Name.starts_with("avx512.mask.sub.p")) {
3591     if (Name.ends_with(".512")) {
3592       Intrinsic::ID IID;
3593       if (Name[17] == 's')
3594         IID = Intrinsic::x86_avx512_sub_ps_512;
3595       else
3596         IID = Intrinsic::x86_avx512_sub_pd_512;
3597 
3598       Rep = Builder.CreateIntrinsic(
3599           IID,
3600           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3601     } else {
3602       Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3603     }
3604     Rep =
3605         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3606   } else if ((Name.starts_with("avx512.mask.max.p") ||
3607               Name.starts_with("avx512.mask.min.p")) &&
3608              Name.drop_front(18) == ".512") {
3609     bool IsDouble = Name[17] == 'd';
3610     bool IsMin = Name[13] == 'i';
3611     static const Intrinsic::ID MinMaxTbl[2][2] = {
3612         {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3613         {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3614     Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3615 
3616     Rep = Builder.CreateIntrinsic(
3617         IID,
3618         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3619     Rep =
3620         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3621   } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3622     Rep =
3623         Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3624                                 {CI->getArgOperand(0), Builder.getInt1(false)});
3625     Rep =
3626         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3627   } else if (Name.starts_with("avx512.mask.psll")) {
3628     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3629     bool IsVariable = Name[16] == 'v';
3630     char Size = Name[16] == '.'   ? Name[17]
3631                 : Name[17] == '.' ? Name[18]
3632                 : Name[18] == '.' ? Name[19]
3633                                   : Name[20];
3634 
3635     Intrinsic::ID IID;
3636     if (IsVariable && Name[17] != '.') {
3637       if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3638         IID = Intrinsic::x86_avx2_psllv_q;
3639       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3640         IID = Intrinsic::x86_avx2_psllv_q_256;
3641       else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3642         IID = Intrinsic::x86_avx2_psllv_d;
3643       else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3644         IID = Intrinsic::x86_avx2_psllv_d_256;
3645       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3646         IID = Intrinsic::x86_avx512_psllv_w_128;
3647       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3648         IID = Intrinsic::x86_avx512_psllv_w_256;
3649       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3650         IID = Intrinsic::x86_avx512_psllv_w_512;
3651       else
3652         llvm_unreachable("Unexpected size");
3653     } else if (Name.ends_with(".128")) {
3654       if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3655         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3656                           : Intrinsic::x86_sse2_psll_d;
3657       else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3658         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3659                           : Intrinsic::x86_sse2_psll_q;
3660       else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3661         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3662                           : Intrinsic::x86_sse2_psll_w;
3663       else
3664         llvm_unreachable("Unexpected size");
3665     } else if (Name.ends_with(".256")) {
3666       if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3667         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3668                           : Intrinsic::x86_avx2_psll_d;
3669       else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3670         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3671                           : Intrinsic::x86_avx2_psll_q;
3672       else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3673         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3674                           : Intrinsic::x86_avx2_psll_w;
3675       else
3676         llvm_unreachable("Unexpected size");
3677     } else {
3678       if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3679         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_d_512
3680               : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3681                            : Intrinsic::x86_avx512_psll_d_512;
3682       else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3683         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_q_512
3684               : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3685                            : Intrinsic::x86_avx512_psll_q_512;
3686       else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3687         IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3688                           : Intrinsic::x86_avx512_psll_w_512;
3689       else
3690         llvm_unreachable("Unexpected size");
3691     }
3692 
3693     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3694   } else if (Name.starts_with("avx512.mask.psrl")) {
3695     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3696     bool IsVariable = Name[16] == 'v';
3697     char Size = Name[16] == '.'   ? Name[17]
3698                 : Name[17] == '.' ? Name[18]
3699                 : Name[18] == '.' ? Name[19]
3700                                   : Name[20];
3701 
3702     Intrinsic::ID IID;
3703     if (IsVariable && Name[17] != '.') {
3704       if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3705         IID = Intrinsic::x86_avx2_psrlv_q;
3706       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3707         IID = Intrinsic::x86_avx2_psrlv_q_256;
3708       else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3709         IID = Intrinsic::x86_avx2_psrlv_d;
3710       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3711         IID = Intrinsic::x86_avx2_psrlv_d_256;
3712       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3713         IID = Intrinsic::x86_avx512_psrlv_w_128;
3714       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3715         IID = Intrinsic::x86_avx512_psrlv_w_256;
3716       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3717         IID = Intrinsic::x86_avx512_psrlv_w_512;
3718       else
3719         llvm_unreachable("Unexpected size");
3720     } else if (Name.ends_with(".128")) {
3721       if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3722         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3723                           : Intrinsic::x86_sse2_psrl_d;
3724       else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3725         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3726                           : Intrinsic::x86_sse2_psrl_q;
3727       else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3728         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3729                           : Intrinsic::x86_sse2_psrl_w;
3730       else
3731         llvm_unreachable("Unexpected size");
3732     } else if (Name.ends_with(".256")) {
3733       if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3734         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3735                           : Intrinsic::x86_avx2_psrl_d;
3736       else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3737         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3738                           : Intrinsic::x86_avx2_psrl_q;
3739       else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3740         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3741                           : Intrinsic::x86_avx2_psrl_w;
3742       else
3743         llvm_unreachable("Unexpected size");
3744     } else {
3745       if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3746         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_d_512
3747               : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3748                            : Intrinsic::x86_avx512_psrl_d_512;
3749       else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3750         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_q_512
3751               : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3752                            : Intrinsic::x86_avx512_psrl_q_512;
3753       else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3754         IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3755                           : Intrinsic::x86_avx512_psrl_w_512;
3756       else
3757         llvm_unreachable("Unexpected size");
3758     }
3759 
3760     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3761   } else if (Name.starts_with("avx512.mask.psra")) {
3762     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3763     bool IsVariable = Name[16] == 'v';
3764     char Size = Name[16] == '.'   ? Name[17]
3765                 : Name[17] == '.' ? Name[18]
3766                 : Name[18] == '.' ? Name[19]
3767                                   : Name[20];
3768 
3769     Intrinsic::ID IID;
3770     if (IsVariable && Name[17] != '.') {
3771       if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3772         IID = Intrinsic::x86_avx2_psrav_d;
3773       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3774         IID = Intrinsic::x86_avx2_psrav_d_256;
3775       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3776         IID = Intrinsic::x86_avx512_psrav_w_128;
3777       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3778         IID = Intrinsic::x86_avx512_psrav_w_256;
3779       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3780         IID = Intrinsic::x86_avx512_psrav_w_512;
3781       else
3782         llvm_unreachable("Unexpected size");
3783     } else if (Name.ends_with(".128")) {
3784       if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3785         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3786                           : Intrinsic::x86_sse2_psra_d;
3787       else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3788         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_128
3789               : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3790                            : Intrinsic::x86_avx512_psra_q_128;
3791       else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3792         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3793                           : Intrinsic::x86_sse2_psra_w;
3794       else
3795         llvm_unreachable("Unexpected size");
3796     } else if (Name.ends_with(".256")) {
3797       if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3798         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3799                           : Intrinsic::x86_avx2_psra_d;
3800       else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3801         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_256
3802               : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3803                            : Intrinsic::x86_avx512_psra_q_256;
3804       else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3805         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3806                           : Intrinsic::x86_avx2_psra_w;
3807       else
3808         llvm_unreachable("Unexpected size");
3809     } else {
3810       if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3811         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_d_512
3812               : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3813                            : Intrinsic::x86_avx512_psra_d_512;
3814       else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3815         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_512
3816               : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3817                            : Intrinsic::x86_avx512_psra_q_512;
3818       else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3819         IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3820                           : Intrinsic::x86_avx512_psra_w_512;
3821       else
3822         llvm_unreachable("Unexpected size");
3823     }
3824 
3825     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3826   } else if (Name.starts_with("avx512.mask.move.s")) {
3827     Rep = upgradeMaskedMove(Builder, *CI);
3828   } else if (Name.starts_with("avx512.cvtmask2")) {
3829     Rep = upgradeMaskToInt(Builder, *CI);
3830   } else if (Name.ends_with(".movntdqa")) {
3831     MDNode *Node = MDNode::get(
3832         C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3833 
3834     LoadInst *LI = Builder.CreateAlignedLoad(
3835         CI->getType(), CI->getArgOperand(0),
3836         Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3837     LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3838     Rep = LI;
3839   } else if (Name.starts_with("fma.vfmadd.") ||
3840              Name.starts_with("fma.vfmsub.") ||
3841              Name.starts_with("fma.vfnmadd.") ||
3842              Name.starts_with("fma.vfnmsub.")) {
3843     bool NegMul = Name[6] == 'n';
3844     bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3845     bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3846 
3847     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3848                     CI->getArgOperand(2)};
3849 
3850     if (IsScalar) {
3851       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3852       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3853       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3854     }
3855 
3856     if (NegMul && !IsScalar)
3857       Ops[0] = Builder.CreateFNeg(Ops[0]);
3858     if (NegMul && IsScalar)
3859       Ops[1] = Builder.CreateFNeg(Ops[1]);
3860     if (NegAcc)
3861       Ops[2] = Builder.CreateFNeg(Ops[2]);
3862 
3863     Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3864 
3865     if (IsScalar)
3866       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3867   } else if (Name.starts_with("fma4.vfmadd.s")) {
3868     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3869                     CI->getArgOperand(2)};
3870 
3871     Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3872     Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3873     Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3874 
3875     Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3876 
3877     Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3878                                       Rep, (uint64_t)0);
3879   } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3880              Name.starts_with("avx512.maskz.vfmadd.s") ||
3881              Name.starts_with("avx512.mask3.vfmadd.s") ||
3882              Name.starts_with("avx512.mask3.vfmsub.s") ||
3883              Name.starts_with("avx512.mask3.vfnmsub.s")) {
3884     bool IsMask3 = Name[11] == '3';
3885     bool IsMaskZ = Name[11] == 'z';
3886     // Drop the "avx512.mask." to make it easier.
3887     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3888     bool NegMul = Name[2] == 'n';
3889     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3890 
3891     Value *A = CI->getArgOperand(0);
3892     Value *B = CI->getArgOperand(1);
3893     Value *C = CI->getArgOperand(2);
3894 
3895     if (NegMul && (IsMask3 || IsMaskZ))
3896       A = Builder.CreateFNeg(A);
3897     if (NegMul && !(IsMask3 || IsMaskZ))
3898       B = Builder.CreateFNeg(B);
3899     if (NegAcc)
3900       C = Builder.CreateFNeg(C);
3901 
3902     A = Builder.CreateExtractElement(A, (uint64_t)0);
3903     B = Builder.CreateExtractElement(B, (uint64_t)0);
3904     C = Builder.CreateExtractElement(C, (uint64_t)0);
3905 
3906     if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3907         cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3908       Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3909 
3910       Intrinsic::ID IID;
3911       if (Name.back() == 'd')
3912         IID = Intrinsic::x86_avx512_vfmadd_f64;
3913       else
3914         IID = Intrinsic::x86_avx512_vfmadd_f32;
3915       Rep = Builder.CreateIntrinsic(IID, Ops);
3916     } else {
3917       Rep = Builder.CreateFMA(A, B, C);
3918     }
3919 
3920     Value *PassThru = IsMaskZ   ? Constant::getNullValue(Rep->getType())
3921                       : IsMask3 ? C
3922                                 : A;
3923 
3924     // For Mask3 with NegAcc, we need to create a new extractelement that
3925     // avoids the negation above.
3926     if (NegAcc && IsMask3)
3927       PassThru =
3928           Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3929 
3930     Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3931     Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3932                                       (uint64_t)0);
3933   } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3934              Name.starts_with("avx512.mask.vfnmadd.p") ||
3935              Name.starts_with("avx512.mask.vfnmsub.p") ||
3936              Name.starts_with("avx512.mask3.vfmadd.p") ||
3937              Name.starts_with("avx512.mask3.vfmsub.p") ||
3938              Name.starts_with("avx512.mask3.vfnmsub.p") ||
3939              Name.starts_with("avx512.maskz.vfmadd.p")) {
3940     bool IsMask3 = Name[11] == '3';
3941     bool IsMaskZ = Name[11] == 'z';
3942     // Drop the "avx512.mask." to make it easier.
3943     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3944     bool NegMul = Name[2] == 'n';
3945     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3946 
3947     Value *A = CI->getArgOperand(0);
3948     Value *B = CI->getArgOperand(1);
3949     Value *C = CI->getArgOperand(2);
3950 
3951     if (NegMul && (IsMask3 || IsMaskZ))
3952       A = Builder.CreateFNeg(A);
3953     if (NegMul && !(IsMask3 || IsMaskZ))
3954       B = Builder.CreateFNeg(B);
3955     if (NegAcc)
3956       C = Builder.CreateFNeg(C);
3957 
3958     if (CI->arg_size() == 5 &&
3959         (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3960          cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3961       Intrinsic::ID IID;
3962       // Check the character before ".512" in string.
3963       if (Name[Name.size() - 5] == 's')
3964         IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3965       else
3966         IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3967 
3968       Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
3969     } else {
3970       Rep = Builder.CreateFMA(A, B, C);
3971     }
3972 
3973     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
3974                       : IsMask3 ? CI->getArgOperand(2)
3975                                 : CI->getArgOperand(0);
3976 
3977     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3978   } else if (Name.starts_with("fma.vfmsubadd.p")) {
3979     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3980     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3981     Intrinsic::ID IID;
3982     if (VecWidth == 128 && EltWidth == 32)
3983       IID = Intrinsic::x86_fma_vfmaddsub_ps;
3984     else if (VecWidth == 256 && EltWidth == 32)
3985       IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3986     else if (VecWidth == 128 && EltWidth == 64)
3987       IID = Intrinsic::x86_fma_vfmaddsub_pd;
3988     else if (VecWidth == 256 && EltWidth == 64)
3989       IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3990     else
3991       llvm_unreachable("Unexpected intrinsic");
3992 
3993     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3994                     CI->getArgOperand(2)};
3995     Ops[2] = Builder.CreateFNeg(Ops[2]);
3996     Rep = Builder.CreateIntrinsic(IID, Ops);
3997   } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3998              Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3999              Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4000              Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4001     bool IsMask3 = Name[11] == '3';
4002     bool IsMaskZ = Name[11] == 'z';
4003     // Drop the "avx512.mask." to make it easier.
4004     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4005     bool IsSubAdd = Name[3] == 's';
4006     if (CI->arg_size() == 5) {
4007       Intrinsic::ID IID;
4008       // Check the character before ".512" in string.
4009       if (Name[Name.size() - 5] == 's')
4010         IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4011       else
4012         IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4013 
4014       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4015                       CI->getArgOperand(2), CI->getArgOperand(4)};
4016       if (IsSubAdd)
4017         Ops[2] = Builder.CreateFNeg(Ops[2]);
4018 
4019       Rep = Builder.CreateIntrinsic(IID, Ops);
4020     } else {
4021       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4022 
4023       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4024                       CI->getArgOperand(2)};
4025 
4026       Function *FMA = Intrinsic::getOrInsertDeclaration(
4027           CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4028       Value *Odd = Builder.CreateCall(FMA, Ops);
4029       Ops[2] = Builder.CreateFNeg(Ops[2]);
4030       Value *Even = Builder.CreateCall(FMA, Ops);
4031 
4032       if (IsSubAdd)
4033         std::swap(Even, Odd);
4034 
4035       SmallVector<int, 32> Idxs(NumElts);
4036       for (int i = 0; i != NumElts; ++i)
4037         Idxs[i] = i + (i % 2) * NumElts;
4038 
4039       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4040     }
4041 
4042     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
4043                       : IsMask3 ? CI->getArgOperand(2)
4044                                 : CI->getArgOperand(0);
4045 
4046     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4047   } else if (Name.starts_with("avx512.mask.pternlog.") ||
4048              Name.starts_with("avx512.maskz.pternlog.")) {
4049     bool ZeroMask = Name[11] == 'z';
4050     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4051     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4052     Intrinsic::ID IID;
4053     if (VecWidth == 128 && EltWidth == 32)
4054       IID = Intrinsic::x86_avx512_pternlog_d_128;
4055     else if (VecWidth == 256 && EltWidth == 32)
4056       IID = Intrinsic::x86_avx512_pternlog_d_256;
4057     else if (VecWidth == 512 && EltWidth == 32)
4058       IID = Intrinsic::x86_avx512_pternlog_d_512;
4059     else if (VecWidth == 128 && EltWidth == 64)
4060       IID = Intrinsic::x86_avx512_pternlog_q_128;
4061     else if (VecWidth == 256 && EltWidth == 64)
4062       IID = Intrinsic::x86_avx512_pternlog_q_256;
4063     else if (VecWidth == 512 && EltWidth == 64)
4064       IID = Intrinsic::x86_avx512_pternlog_q_512;
4065     else
4066       llvm_unreachable("Unexpected intrinsic");
4067 
4068     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4069                      CI->getArgOperand(2), CI->getArgOperand(3)};
4070     Rep = Builder.CreateIntrinsic(IID, Args);
4071     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4072                                : CI->getArgOperand(0);
4073     Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4074   } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4075              Name.starts_with("avx512.maskz.vpmadd52")) {
4076     bool ZeroMask = Name[11] == 'z';
4077     bool High = Name[20] == 'h' || Name[21] == 'h';
4078     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4079     Intrinsic::ID IID;
4080     if (VecWidth == 128 && !High)
4081       IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4082     else if (VecWidth == 256 && !High)
4083       IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4084     else if (VecWidth == 512 && !High)
4085       IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4086     else if (VecWidth == 128 && High)
4087       IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4088     else if (VecWidth == 256 && High)
4089       IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4090     else if (VecWidth == 512 && High)
4091       IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4092     else
4093       llvm_unreachable("Unexpected intrinsic");
4094 
4095     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4096                      CI->getArgOperand(2)};
4097     Rep = Builder.CreateIntrinsic(IID, Args);
4098     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4099                                : CI->getArgOperand(0);
4100     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4101   } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4102              Name.starts_with("avx512.mask.vpermt2var.") ||
4103              Name.starts_with("avx512.maskz.vpermt2var.")) {
4104     bool ZeroMask = Name[11] == 'z';
4105     bool IndexForm = Name[17] == 'i';
4106     Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4107   } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4108              Name.starts_with("avx512.maskz.vpdpbusd.") ||
4109              Name.starts_with("avx512.mask.vpdpbusds.") ||
4110              Name.starts_with("avx512.maskz.vpdpbusds.")) {
4111     bool ZeroMask = Name[11] == 'z';
4112     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4113     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4114     Intrinsic::ID IID;
4115     if (VecWidth == 128 && !IsSaturating)
4116       IID = Intrinsic::x86_avx512_vpdpbusd_128;
4117     else if (VecWidth == 256 && !IsSaturating)
4118       IID = Intrinsic::x86_avx512_vpdpbusd_256;
4119     else if (VecWidth == 512 && !IsSaturating)
4120       IID = Intrinsic::x86_avx512_vpdpbusd_512;
4121     else if (VecWidth == 128 && IsSaturating)
4122       IID = Intrinsic::x86_avx512_vpdpbusds_128;
4123     else if (VecWidth == 256 && IsSaturating)
4124       IID = Intrinsic::x86_avx512_vpdpbusds_256;
4125     else if (VecWidth == 512 && IsSaturating)
4126       IID = Intrinsic::x86_avx512_vpdpbusds_512;
4127     else
4128       llvm_unreachable("Unexpected intrinsic");
4129 
4130     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4131                      CI->getArgOperand(2)};
4132     Rep = Builder.CreateIntrinsic(IID, Args);
4133     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4134                                : CI->getArgOperand(0);
4135     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4136   } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4137              Name.starts_with("avx512.maskz.vpdpwssd.") ||
4138              Name.starts_with("avx512.mask.vpdpwssds.") ||
4139              Name.starts_with("avx512.maskz.vpdpwssds.")) {
4140     bool ZeroMask = Name[11] == 'z';
4141     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4142     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4143     Intrinsic::ID IID;
4144     if (VecWidth == 128 && !IsSaturating)
4145       IID = Intrinsic::x86_avx512_vpdpwssd_128;
4146     else if (VecWidth == 256 && !IsSaturating)
4147       IID = Intrinsic::x86_avx512_vpdpwssd_256;
4148     else if (VecWidth == 512 && !IsSaturating)
4149       IID = Intrinsic::x86_avx512_vpdpwssd_512;
4150     else if (VecWidth == 128 && IsSaturating)
4151       IID = Intrinsic::x86_avx512_vpdpwssds_128;
4152     else if (VecWidth == 256 && IsSaturating)
4153       IID = Intrinsic::x86_avx512_vpdpwssds_256;
4154     else if (VecWidth == 512 && IsSaturating)
4155       IID = Intrinsic::x86_avx512_vpdpwssds_512;
4156     else
4157       llvm_unreachable("Unexpected intrinsic");
4158 
4159     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4160                      CI->getArgOperand(2)};
4161     Rep = Builder.CreateIntrinsic(IID, Args);
4162     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4163                                : CI->getArgOperand(0);
4164     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4165   } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4166              Name == "addcarry.u32" || Name == "addcarry.u64" ||
4167              Name == "subborrow.u32" || Name == "subborrow.u64") {
4168     Intrinsic::ID IID;
4169     if (Name[0] == 'a' && Name.back() == '2')
4170       IID = Intrinsic::x86_addcarry_32;
4171     else if (Name[0] == 'a' && Name.back() == '4')
4172       IID = Intrinsic::x86_addcarry_64;
4173     else if (Name[0] == 's' && Name.back() == '2')
4174       IID = Intrinsic::x86_subborrow_32;
4175     else if (Name[0] == 's' && Name.back() == '4')
4176       IID = Intrinsic::x86_subborrow_64;
4177     else
4178       llvm_unreachable("Unexpected intrinsic");
4179 
4180     // Make a call with 3 operands.
4181     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4182                      CI->getArgOperand(2)};
4183     Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4184 
4185     // Extract the second result and store it.
4186     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4187     Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4188     // Replace the original call result with the first result of the new call.
4189     Value *CF = Builder.CreateExtractValue(NewCall, 0);
4190 
4191     CI->replaceAllUsesWith(CF);
4192     Rep = nullptr;
4193   } else if (Name.starts_with("avx512.mask.") &&
4194              upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4195     // Rep will be updated by the call in the condition.
4196   }
4197 
4198   return Rep;
4199 }
4200 
upgradeAArch64IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4201 static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4202                                           Function *F, IRBuilder<> &Builder) {
4203   if (Name.starts_with("neon.bfcvt")) {
4204     if (Name.starts_with("neon.bfcvtn2")) {
4205       SmallVector<int, 32> LoMask(4);
4206       std::iota(LoMask.begin(), LoMask.end(), 0);
4207       SmallVector<int, 32> ConcatMask(8);
4208       std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4209       Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4210       Value *Trunc =
4211           Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4212       return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4213     } else if (Name.starts_with("neon.bfcvtn")) {
4214       SmallVector<int, 32> ConcatMask(8);
4215       std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4216       Type *V4BF16 =
4217           FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4218       Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4219       dbgs() << "Trunc: " << *Trunc << "\n";
4220       return Builder.CreateShuffleVector(
4221           Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4222     } else {
4223       return Builder.CreateFPTrunc(CI->getOperand(0),
4224                                    Type::getBFloatTy(F->getContext()));
4225     }
4226   } else if (Name.starts_with("sve.fcvt")) {
4227     Intrinsic::ID NewID =
4228         StringSwitch<Intrinsic::ID>(Name)
4229             .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4230             .Case("sve.fcvtnt.bf16f32",
4231                   Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4232             .Default(Intrinsic::not_intrinsic);
4233     if (NewID == Intrinsic::not_intrinsic)
4234       llvm_unreachable("Unhandled Intrinsic!");
4235 
4236     SmallVector<Value *, 3> Args(CI->args());
4237 
4238     // The original intrinsics incorrectly used a predicate based on the
4239     // smallest element type rather than the largest.
4240     Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4241     Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4242 
4243     if (Args[1]->getType() != BadPredTy)
4244       llvm_unreachable("Unexpected predicate type!");
4245 
4246     Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4247                                       BadPredTy, Args[1]);
4248     Args[1] = Builder.CreateIntrinsic(
4249         Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4250 
4251     return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4252                                    CI->getName());
4253   }
4254 
4255   llvm_unreachable("Unhandled Intrinsic!");
4256 }
4257 
upgradeARMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4258 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4259                                       IRBuilder<> &Builder) {
4260   if (Name == "mve.vctp64.old") {
4261     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4262     // correct type.
4263     Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4264                                           CI->getArgOperand(0),
4265                                           /*FMFSource=*/nullptr, CI->getName());
4266     Value *C1 = Builder.CreateIntrinsic(
4267         Intrinsic::arm_mve_pred_v2i,
4268         {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4269     return Builder.CreateIntrinsic(
4270         Intrinsic::arm_mve_pred_i2v,
4271         {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4272   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4273              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4274              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4275              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4276              Name ==
4277                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4278              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4279              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4280              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4281              Name ==
4282                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4283              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4284              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4285              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4286              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4287              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4288              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4289              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4290     std::vector<Type *> Tys;
4291     unsigned ID = CI->getIntrinsicID();
4292     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4293     switch (ID) {
4294     case Intrinsic::arm_mve_mull_int_predicated:
4295     case Intrinsic::arm_mve_vqdmull_predicated:
4296     case Intrinsic::arm_mve_vldr_gather_base_predicated:
4297       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4298       break;
4299     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4300     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4301     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4302       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4303              V2I1Ty};
4304       break;
4305     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4306       Tys = {CI->getType(), CI->getOperand(0)->getType(),
4307              CI->getOperand(1)->getType(), V2I1Ty};
4308       break;
4309     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4310       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4311              CI->getOperand(2)->getType(), V2I1Ty};
4312       break;
4313     case Intrinsic::arm_cde_vcx1q_predicated:
4314     case Intrinsic::arm_cde_vcx1qa_predicated:
4315     case Intrinsic::arm_cde_vcx2q_predicated:
4316     case Intrinsic::arm_cde_vcx2qa_predicated:
4317     case Intrinsic::arm_cde_vcx3q_predicated:
4318     case Intrinsic::arm_cde_vcx3qa_predicated:
4319       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4320       break;
4321     default:
4322       llvm_unreachable("Unhandled Intrinsic!");
4323     }
4324 
4325     std::vector<Value *> Ops;
4326     for (Value *Op : CI->args()) {
4327       Type *Ty = Op->getType();
4328       if (Ty->getScalarSizeInBits() == 1) {
4329         Value *C1 = Builder.CreateIntrinsic(
4330             Intrinsic::arm_mve_pred_v2i,
4331             {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4332         Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4333       }
4334       Ops.push_back(Op);
4335     }
4336 
4337     return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4338                                    CI->getName());
4339   }
4340   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4341 }
4342 
4343 // These are expected to have the arguments:
4344 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4345 //
4346 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4347 //
upgradeAMDGCNIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4348 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4349                                          Function *F, IRBuilder<> &Builder) {
4350   AtomicRMWInst::BinOp RMWOp =
4351       StringSwitch<AtomicRMWInst::BinOp>(Name)
4352           .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4353           .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4354           .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4355           .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4356           .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4357           .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4358           .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4359           .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4360           .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4361           .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4362           .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4363 
4364   unsigned NumOperands = CI->getNumOperands();
4365   if (NumOperands < 3) // Malformed bitcode.
4366     return nullptr;
4367 
4368   Value *Ptr = CI->getArgOperand(0);
4369   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4370   if (!PtrTy) // Malformed.
4371     return nullptr;
4372 
4373   Value *Val = CI->getArgOperand(1);
4374   if (Val->getType() != CI->getType()) // Malformed.
4375     return nullptr;
4376 
4377   ConstantInt *OrderArg = nullptr;
4378   bool IsVolatile = false;
4379 
4380   // These should have 5 arguments (plus the callee). A separate version of the
4381   // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4382   if (NumOperands > 3)
4383     OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4384 
4385   // Ignore scope argument at 3
4386 
4387   if (NumOperands > 5) {
4388     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4389     IsVolatile = !VolatileArg || !VolatileArg->isZero();
4390   }
4391 
4392   AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4393   if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4394     Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4395   if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4396     Order = AtomicOrdering::SequentiallyConsistent;
4397 
4398   LLVMContext &Ctx = F->getContext();
4399 
4400   // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4401   Type *RetTy = CI->getType();
4402   if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4403     if (VT->getElementType()->isIntegerTy(16)) {
4404       VectorType *AsBF16 =
4405           VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4406       Val = Builder.CreateBitCast(Val, AsBF16);
4407     }
4408   }
4409 
4410   // The scope argument never really worked correctly. Use agent as the most
4411   // conservative option which should still always produce the instruction.
4412   SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4413   AtomicRMWInst *RMW =
4414       Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4415 
4416   unsigned AddrSpace = PtrTy->getAddressSpace();
4417   if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4418     MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4419     RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4420     if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4421       RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4422   }
4423 
4424   if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4425     MDBuilder MDB(F->getContext());
4426     MDNode *RangeNotPrivate =
4427         MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4428                         APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4429     RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4430   }
4431 
4432   if (IsVolatile)
4433     RMW->setVolatile(true);
4434 
4435   return Builder.CreateBitCast(RMW, RetTy);
4436 }
4437 
4438 /// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4439 /// plain MDNode, as it's the verifier's job to check these are the correct
4440 /// types later.
unwrapMAVOp(CallBase * CI,unsigned Op)4441 static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4442   if (Op < CI->arg_size()) {
4443     if (MetadataAsValue *MAV =
4444             dyn_cast<MetadataAsValue>(CI->getArgOperand(Op))) {
4445       Metadata *MD = MAV->getMetadata();
4446       return dyn_cast_if_present<MDNode>(MD);
4447     }
4448   }
4449   return nullptr;
4450 }
4451 
4452 /// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
unwrapMAVMetadataOp(CallBase * CI,unsigned Op)4453 static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4454   if (Op < CI->arg_size())
4455     if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4456       return MAV->getMetadata();
4457   return nullptr;
4458 }
4459 
getDebugLocSafe(const Instruction * I)4460 static MDNode *getDebugLocSafe(const Instruction *I) {
4461   // The MDNode attached to this instruction might not be the correct type,
4462   // as the verifier has not yet be run. Fetch it as a bare MDNode.
4463   return I->getDebugLoc().getAsMDNode();
4464 }
4465 
4466 /// Convert debug intrinsic calls to non-instruction debug records.
4467 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4468 /// \p CI - The debug intrinsic call.
upgradeDbgIntrinsicToDbgRecord(StringRef Name,CallBase * CI)4469 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4470   DbgRecord *DR = nullptr;
4471   if (Name == "label") {
4472     DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(unwrapMAVOp(CI, 0),
4473                                                         CI->getDebugLoc());
4474   } else if (Name == "assign") {
4475     DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4476         DbgVariableRecord::LocationType::Assign, unwrapMAVMetadataOp(CI, 0),
4477         unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4478         unwrapMAVMetadataOp(CI, 4),
4479         /*The address is a Value ref, it will be stored as a Metadata */
4480         unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4481   } else if (Name == "declare") {
4482     DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4483         DbgVariableRecord::LocationType::Declare, unwrapMAVMetadataOp(CI, 0),
4484         unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4485         getDebugLocSafe(CI));
4486   } else if (Name == "addr") {
4487     // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4488     MDNode *ExprNode = unwrapMAVOp(CI, 2);
4489     // Don't try to add something to the expression if it's not an expression.
4490     // Instead, allow the verifier to fail later.
4491     if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4492       ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4493     }
4494     DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4495         DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4496         unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4497         getDebugLocSafe(CI));
4498   } else if (Name == "value") {
4499     // An old version of dbg.value had an extra offset argument.
4500     unsigned VarOp = 1;
4501     unsigned ExprOp = 2;
4502     if (CI->arg_size() == 4) {
4503       auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4504       // Nonzero offset dbg.values get dropped without a replacement.
4505       if (!Offset || !Offset->isZeroValue())
4506         return;
4507       VarOp = 2;
4508       ExprOp = 3;
4509     }
4510     DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4511         DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4512         unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4513         nullptr, getDebugLocSafe(CI));
4514   }
4515   assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4516   CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4517 }
4518 
4519 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4520 /// provided to seamlessly integrate with existing context.
UpgradeIntrinsicCall(CallBase * CI,Function * NewFn)4521 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4522   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4523   // checks the callee's function type matches. It's likely we need to handle
4524   // type changes here.
4525   Function *F = dyn_cast<Function>(CI->getCalledOperand());
4526   if (!F)
4527     return;
4528 
4529   LLVMContext &C = CI->getContext();
4530   IRBuilder<> Builder(C);
4531   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4532 
4533   if (!NewFn) {
4534     // Get the Function's name.
4535     StringRef Name = F->getName();
4536 
4537     assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4538     Name = Name.substr(5);
4539 
4540     bool IsX86 = Name.consume_front("x86.");
4541     bool IsNVVM = Name.consume_front("nvvm.");
4542     bool IsAArch64 = Name.consume_front("aarch64.");
4543     bool IsARM = Name.consume_front("arm.");
4544     bool IsAMDGCN = Name.consume_front("amdgcn.");
4545     bool IsDbg = Name.consume_front("dbg.");
4546     Value *Rep = nullptr;
4547 
4548     if (!IsX86 && Name == "stackprotectorcheck") {
4549       Rep = nullptr;
4550     } else if (IsNVVM) {
4551       Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4552     } else if (IsX86) {
4553       Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4554     } else if (IsAArch64) {
4555       Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4556     } else if (IsARM) {
4557       Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4558     } else if (IsAMDGCN) {
4559       Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4560     } else if (IsDbg) {
4561       upgradeDbgIntrinsicToDbgRecord(Name, CI);
4562     } else {
4563       llvm_unreachable("Unknown function for CallBase upgrade.");
4564     }
4565 
4566     if (Rep)
4567       CI->replaceAllUsesWith(Rep);
4568     CI->eraseFromParent();
4569     return;
4570   }
4571 
4572   const auto &DefaultCase = [&]() -> void {
4573     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4574       // Handle generic mangling change.
4575       assert(
4576           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4577           "Unknown function for CallBase upgrade and isn't just a name change");
4578       CI->setCalledFunction(NewFn);
4579       return;
4580     }
4581 
4582     // This must be an upgrade from a named to a literal struct.
4583     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4584       assert(OldST != NewFn->getReturnType() &&
4585              "Return type must have changed");
4586       assert(OldST->getNumElements() ==
4587                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4588              "Must have same number of elements");
4589 
4590       SmallVector<Value *> Args(CI->args());
4591       CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4592       NewCI->setAttributes(CI->getAttributes());
4593       Value *Res = PoisonValue::get(OldST);
4594       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4595         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4596         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4597       }
4598       CI->replaceAllUsesWith(Res);
4599       CI->eraseFromParent();
4600       return;
4601     }
4602 
4603     // We're probably about to produce something invalid. Let the verifier catch
4604     // it instead of dying here.
4605     CI->setCalledOperand(
4606         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4607     return;
4608   };
4609   CallInst *NewCall = nullptr;
4610   switch (NewFn->getIntrinsicID()) {
4611   default: {
4612     DefaultCase();
4613     return;
4614   }
4615   case Intrinsic::arm_neon_vst1:
4616   case Intrinsic::arm_neon_vst2:
4617   case Intrinsic::arm_neon_vst3:
4618   case Intrinsic::arm_neon_vst4:
4619   case Intrinsic::arm_neon_vst2lane:
4620   case Intrinsic::arm_neon_vst3lane:
4621   case Intrinsic::arm_neon_vst4lane: {
4622     SmallVector<Value *, 4> Args(CI->args());
4623     NewCall = Builder.CreateCall(NewFn, Args);
4624     break;
4625   }
4626   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4627   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4628   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4629     LLVMContext &Ctx = F->getParent()->getContext();
4630     SmallVector<Value *, 4> Args(CI->args());
4631     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4632                                cast<ConstantInt>(Args[3])->getZExtValue());
4633     NewCall = Builder.CreateCall(NewFn, Args);
4634     break;
4635   }
4636   case Intrinsic::aarch64_sve_ld3_sret:
4637   case Intrinsic::aarch64_sve_ld4_sret:
4638   case Intrinsic::aarch64_sve_ld2_sret: {
4639     StringRef Name = F->getName();
4640     Name = Name.substr(5);
4641     unsigned N = StringSwitch<unsigned>(Name)
4642                      .StartsWith("aarch64.sve.ld2", 2)
4643                      .StartsWith("aarch64.sve.ld3", 3)
4644                      .StartsWith("aarch64.sve.ld4", 4)
4645                      .Default(0);
4646     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4647     unsigned MinElts = RetTy->getMinNumElements() / N;
4648     SmallVector<Value *, 2> Args(CI->args());
4649     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4650     Value *Ret = llvm::PoisonValue::get(RetTy);
4651     for (unsigned I = 0; I < N; I++) {
4652       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4653       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4654     }
4655     NewCall = dyn_cast<CallInst>(Ret);
4656     break;
4657   }
4658 
4659   case Intrinsic::coro_end: {
4660     SmallVector<Value *, 3> Args(CI->args());
4661     Args.push_back(ConstantTokenNone::get(CI->getContext()));
4662     NewCall = Builder.CreateCall(NewFn, Args);
4663     break;
4664   }
4665 
4666   case Intrinsic::vector_extract: {
4667     StringRef Name = F->getName();
4668     Name = Name.substr(5); // Strip llvm
4669     if (!Name.starts_with("aarch64.sve.tuple.get")) {
4670       DefaultCase();
4671       return;
4672     }
4673     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4674     unsigned MinElts = RetTy->getMinNumElements();
4675     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4676     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4677     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4678     break;
4679   }
4680 
4681   case Intrinsic::vector_insert: {
4682     StringRef Name = F->getName();
4683     Name = Name.substr(5);
4684     if (!Name.starts_with("aarch64.sve.tuple")) {
4685       DefaultCase();
4686       return;
4687     }
4688     if (Name.starts_with("aarch64.sve.tuple.set")) {
4689       unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4690       auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4691       Value *NewIdx =
4692           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4693       NewCall = Builder.CreateCall(
4694           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4695       break;
4696     }
4697     if (Name.starts_with("aarch64.sve.tuple.create")) {
4698       unsigned N = StringSwitch<unsigned>(Name)
4699                        .StartsWith("aarch64.sve.tuple.create2", 2)
4700                        .StartsWith("aarch64.sve.tuple.create3", 3)
4701                        .StartsWith("aarch64.sve.tuple.create4", 4)
4702                        .Default(0);
4703       assert(N > 1 && "Create is expected to be between 2-4");
4704       auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4705       Value *Ret = llvm::PoisonValue::get(RetTy);
4706       unsigned MinElts = RetTy->getMinNumElements() / N;
4707       for (unsigned I = 0; I < N; I++) {
4708         Value *V = CI->getArgOperand(I);
4709         Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4710       }
4711       NewCall = dyn_cast<CallInst>(Ret);
4712     }
4713     break;
4714   }
4715 
4716   case Intrinsic::arm_neon_bfdot:
4717   case Intrinsic::arm_neon_bfmmla:
4718   case Intrinsic::arm_neon_bfmlalb:
4719   case Intrinsic::arm_neon_bfmlalt:
4720   case Intrinsic::aarch64_neon_bfdot:
4721   case Intrinsic::aarch64_neon_bfmmla:
4722   case Intrinsic::aarch64_neon_bfmlalb:
4723   case Intrinsic::aarch64_neon_bfmlalt: {
4724     SmallVector<Value *, 3> Args;
4725     assert(CI->arg_size() == 3 &&
4726            "Mismatch between function args and call args");
4727     size_t OperandWidth =
4728         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4729     assert((OperandWidth == 64 || OperandWidth == 128) &&
4730            "Unexpected operand width");
4731     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4732     auto Iter = CI->args().begin();
4733     Args.push_back(*Iter++);
4734     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4735     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4736     NewCall = Builder.CreateCall(NewFn, Args);
4737     break;
4738   }
4739 
4740   case Intrinsic::bitreverse:
4741     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4742     break;
4743 
4744   case Intrinsic::ctlz:
4745   case Intrinsic::cttz:
4746     assert(CI->arg_size() == 1 &&
4747            "Mismatch between function args and call args");
4748     NewCall =
4749         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4750     break;
4751 
4752   case Intrinsic::objectsize: {
4753     Value *NullIsUnknownSize =
4754         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4755     Value *Dynamic =
4756         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4757     NewCall = Builder.CreateCall(
4758         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4759     break;
4760   }
4761 
4762   case Intrinsic::ctpop:
4763     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4764     break;
4765 
4766   case Intrinsic::convert_from_fp16:
4767     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4768     break;
4769 
4770   case Intrinsic::dbg_value: {
4771     StringRef Name = F->getName();
4772     Name = Name.substr(5); // Strip llvm.
4773     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4774     if (Name.starts_with("dbg.addr")) {
4775       DIExpression *Expr = cast<DIExpression>(
4776           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4777       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4778       NewCall =
4779           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4780                                      MetadataAsValue::get(C, Expr)});
4781       break;
4782     }
4783 
4784     // Upgrade from the old version that had an extra offset argument.
4785     assert(CI->arg_size() == 4);
4786     // Drop nonzero offsets instead of attempting to upgrade them.
4787     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4788       if (Offset->isZeroValue()) {
4789         NewCall = Builder.CreateCall(
4790             NewFn,
4791             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4792         break;
4793       }
4794     CI->eraseFromParent();
4795     return;
4796   }
4797 
4798   case Intrinsic::ptr_annotation:
4799     // Upgrade from versions that lacked the annotation attribute argument.
4800     if (CI->arg_size() != 4) {
4801       DefaultCase();
4802       return;
4803     }
4804 
4805     // Create a new call with an added null annotation attribute argument.
4806     NewCall = Builder.CreateCall(
4807         NewFn,
4808         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4809          CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4810     NewCall->takeName(CI);
4811     CI->replaceAllUsesWith(NewCall);
4812     CI->eraseFromParent();
4813     return;
4814 
4815   case Intrinsic::var_annotation:
4816     // Upgrade from versions that lacked the annotation attribute argument.
4817     if (CI->arg_size() != 4) {
4818       DefaultCase();
4819       return;
4820     }
4821     // Create a new call with an added null annotation attribute argument.
4822     NewCall = Builder.CreateCall(
4823         NewFn,
4824         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4825          CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4826     NewCall->takeName(CI);
4827     CI->replaceAllUsesWith(NewCall);
4828     CI->eraseFromParent();
4829     return;
4830 
4831   case Intrinsic::riscv_aes32dsi:
4832   case Intrinsic::riscv_aes32dsmi:
4833   case Intrinsic::riscv_aes32esi:
4834   case Intrinsic::riscv_aes32esmi:
4835   case Intrinsic::riscv_sm4ks:
4836   case Intrinsic::riscv_sm4ed: {
4837     // The last argument to these intrinsics used to be i8 and changed to i32.
4838     // The type overload for sm4ks and sm4ed was removed.
4839     Value *Arg2 = CI->getArgOperand(2);
4840     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4841       return;
4842 
4843     Value *Arg0 = CI->getArgOperand(0);
4844     Value *Arg1 = CI->getArgOperand(1);
4845     if (CI->getType()->isIntegerTy(64)) {
4846       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4847       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4848     }
4849 
4850     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4851                             cast<ConstantInt>(Arg2)->getZExtValue());
4852 
4853     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4854     Value *Res = NewCall;
4855     if (Res->getType() != CI->getType())
4856       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4857     NewCall->takeName(CI);
4858     CI->replaceAllUsesWith(Res);
4859     CI->eraseFromParent();
4860     return;
4861   }
4862   case Intrinsic::nvvm_mapa_shared_cluster: {
4863     // Create a new call with the correct address space.
4864     NewCall =
4865         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4866     Value *Res = NewCall;
4867     Res = Builder.CreateAddrSpaceCast(
4868         Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4869     NewCall->takeName(CI);
4870     CI->replaceAllUsesWith(Res);
4871     CI->eraseFromParent();
4872     return;
4873   }
4874   case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4875   case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4876     // Create a new call with the correct address space.
4877     SmallVector<Value *, 4> Args(CI->args());
4878     Args[0] = Builder.CreateAddrSpaceCast(
4879         Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4880 
4881     NewCall = Builder.CreateCall(NewFn, Args);
4882     NewCall->takeName(CI);
4883     CI->replaceAllUsesWith(NewCall);
4884     CI->eraseFromParent();
4885     return;
4886   }
4887   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4888   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4889   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4890   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4891   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4892   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4893   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4894   case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4895     SmallVector<Value *, 16> Args(CI->args());
4896 
4897     // Create AddrSpaceCast to shared_cluster if needed.
4898     // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4899     unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
4900     if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
4901       Args[0] = Builder.CreateAddrSpaceCast(
4902           Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4903 
4904     // Attach the flag argument for cta_group, with a
4905     // default value of 0. This handles case (2) in
4906     // shouldUpgradeNVPTXTMAG2SIntrinsics().
4907     size_t NumArgs = CI->arg_size();
4908     Value *FlagArg = CI->getArgOperand(NumArgs - 3);
4909     if (!FlagArg->getType()->isIntegerTy(1))
4910       Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4911 
4912     NewCall = Builder.CreateCall(NewFn, Args);
4913     NewCall->takeName(CI);
4914     CI->replaceAllUsesWith(NewCall);
4915     CI->eraseFromParent();
4916     return;
4917   }
4918   case Intrinsic::riscv_sha256sig0:
4919   case Intrinsic::riscv_sha256sig1:
4920   case Intrinsic::riscv_sha256sum0:
4921   case Intrinsic::riscv_sha256sum1:
4922   case Intrinsic::riscv_sm3p0:
4923   case Intrinsic::riscv_sm3p1: {
4924     // The last argument to these intrinsics used to be i8 and changed to i32.
4925     // The type overload for sm4ks and sm4ed was removed.
4926     if (!CI->getType()->isIntegerTy(64))
4927       return;
4928 
4929     Value *Arg =
4930         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4931 
4932     NewCall = Builder.CreateCall(NewFn, Arg);
4933     Value *Res =
4934         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4935     NewCall->takeName(CI);
4936     CI->replaceAllUsesWith(Res);
4937     CI->eraseFromParent();
4938     return;
4939   }
4940 
4941   case Intrinsic::x86_xop_vfrcz_ss:
4942   case Intrinsic::x86_xop_vfrcz_sd:
4943     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4944     break;
4945 
4946   case Intrinsic::x86_xop_vpermil2pd:
4947   case Intrinsic::x86_xop_vpermil2ps:
4948   case Intrinsic::x86_xop_vpermil2pd_256:
4949   case Intrinsic::x86_xop_vpermil2ps_256: {
4950     SmallVector<Value *, 4> Args(CI->args());
4951     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4952     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4953     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4954     NewCall = Builder.CreateCall(NewFn, Args);
4955     break;
4956   }
4957 
4958   case Intrinsic::x86_sse41_ptestc:
4959   case Intrinsic::x86_sse41_ptestz:
4960   case Intrinsic::x86_sse41_ptestnzc: {
4961     // The arguments for these intrinsics used to be v4f32, and changed
4962     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4963     // So, the only thing required is a bitcast for both arguments.
4964     // First, check the arguments have the old type.
4965     Value *Arg0 = CI->getArgOperand(0);
4966     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4967       return;
4968 
4969     // Old intrinsic, add bitcasts
4970     Value *Arg1 = CI->getArgOperand(1);
4971 
4972     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4973 
4974     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4975     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4976 
4977     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4978     break;
4979   }
4980 
4981   case Intrinsic::x86_rdtscp: {
4982     // This used to take 1 arguments. If we have no arguments, it is already
4983     // upgraded.
4984     if (CI->getNumOperands() == 0)
4985       return;
4986 
4987     NewCall = Builder.CreateCall(NewFn);
4988     // Extract the second result and store it.
4989     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4990     Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4991     // Replace the original call result with the first result of the new call.
4992     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4993 
4994     NewCall->takeName(CI);
4995     CI->replaceAllUsesWith(TSC);
4996     CI->eraseFromParent();
4997     return;
4998   }
4999 
5000   case Intrinsic::x86_sse41_insertps:
5001   case Intrinsic::x86_sse41_dppd:
5002   case Intrinsic::x86_sse41_dpps:
5003   case Intrinsic::x86_sse41_mpsadbw:
5004   case Intrinsic::x86_avx_dp_ps_256:
5005   case Intrinsic::x86_avx2_mpsadbw: {
5006     // Need to truncate the last argument from i32 to i8 -- this argument models
5007     // an inherently 8-bit immediate operand to these x86 instructions.
5008     SmallVector<Value *, 4> Args(CI->args());
5009 
5010     // Replace the last argument with a trunc.
5011     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5012     NewCall = Builder.CreateCall(NewFn, Args);
5013     break;
5014   }
5015 
5016   case Intrinsic::x86_avx512_mask_cmp_pd_128:
5017   case Intrinsic::x86_avx512_mask_cmp_pd_256:
5018   case Intrinsic::x86_avx512_mask_cmp_pd_512:
5019   case Intrinsic::x86_avx512_mask_cmp_ps_128:
5020   case Intrinsic::x86_avx512_mask_cmp_ps_256:
5021   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5022     SmallVector<Value *, 4> Args(CI->args());
5023     unsigned NumElts =
5024         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5025     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5026 
5027     NewCall = Builder.CreateCall(NewFn, Args);
5028     Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5029 
5030     NewCall->takeName(CI);
5031     CI->replaceAllUsesWith(Res);
5032     CI->eraseFromParent();
5033     return;
5034   }
5035 
5036   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5037   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5038   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5039   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5040   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5041   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5042     SmallVector<Value *, 4> Args(CI->args());
5043     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5044     if (NewFn->getIntrinsicID() ==
5045         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5046       Args[1] = Builder.CreateBitCast(
5047           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5048 
5049     NewCall = Builder.CreateCall(NewFn, Args);
5050     Value *Res = Builder.CreateBitCast(
5051         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5052 
5053     NewCall->takeName(CI);
5054     CI->replaceAllUsesWith(Res);
5055     CI->eraseFromParent();
5056     return;
5057   }
5058   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5059   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5060   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5061     SmallVector<Value *, 4> Args(CI->args());
5062     unsigned NumElts =
5063         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5064     Args[1] = Builder.CreateBitCast(
5065         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5066     Args[2] = Builder.CreateBitCast(
5067         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5068 
5069     NewCall = Builder.CreateCall(NewFn, Args);
5070     break;
5071   }
5072 
5073   case Intrinsic::thread_pointer: {
5074     NewCall = Builder.CreateCall(NewFn, {});
5075     break;
5076   }
5077 
5078   case Intrinsic::memcpy:
5079   case Intrinsic::memmove:
5080   case Intrinsic::memset: {
5081     // We have to make sure that the call signature is what we're expecting.
5082     // We only want to change the old signatures by removing the alignment arg:
5083     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5084     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5085     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5086     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
5087     // Note: i8*'s in the above can be any pointer type
5088     if (CI->arg_size() != 5) {
5089       DefaultCase();
5090       return;
5091     }
5092     // Remove alignment argument (3), and add alignment attributes to the
5093     // dest/src pointers.
5094     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5095                       CI->getArgOperand(2), CI->getArgOperand(4)};
5096     NewCall = Builder.CreateCall(NewFn, Args);
5097     AttributeList OldAttrs = CI->getAttributes();
5098     AttributeList NewAttrs = AttributeList::get(
5099         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5100         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5101          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5102     NewCall->setAttributes(NewAttrs);
5103     auto *MemCI = cast<MemIntrinsic>(NewCall);
5104     // All mem intrinsics support dest alignment.
5105     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
5106     MemCI->setDestAlignment(Align->getMaybeAlignValue());
5107     // Memcpy/Memmove also support source alignment.
5108     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5109       MTI->setSourceAlignment(Align->getMaybeAlignValue());
5110     break;
5111   }
5112   }
5113   assert(NewCall && "Should have either set this variable or returned through "
5114                     "the default case");
5115   NewCall->takeName(CI);
5116   CI->replaceAllUsesWith(NewCall);
5117   CI->eraseFromParent();
5118 }
5119 
UpgradeCallsToIntrinsic(Function * F)5120 void llvm::UpgradeCallsToIntrinsic(Function *F) {
5121   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5122 
5123   // Check if this function should be upgraded and get the replacement function
5124   // if there is one.
5125   Function *NewFn;
5126   if (UpgradeIntrinsicFunction(F, NewFn)) {
5127     // Replace all users of the old function with the new function or new
5128     // instructions. This is not a range loop because the call is deleted.
5129     for (User *U : make_early_inc_range(F->users()))
5130       if (CallBase *CB = dyn_cast<CallBase>(U))
5131         UpgradeIntrinsicCall(CB, NewFn);
5132 
5133     // Remove old function, no longer used, from the module.
5134     F->eraseFromParent();
5135   }
5136 }
5137 
UpgradeTBAANode(MDNode & MD)5138 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5139   const unsigned NumOperands = MD.getNumOperands();
5140   if (NumOperands == 0)
5141     return &MD; // Invalid, punt to a verifier error.
5142 
5143   // Check if the tag uses struct-path aware TBAA format.
5144   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5145     return &MD;
5146 
5147   auto &Context = MD.getContext();
5148   if (NumOperands == 3) {
5149     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5150     MDNode *ScalarType = MDNode::get(Context, Elts);
5151     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5152     Metadata *Elts2[] = {ScalarType, ScalarType,
5153                          ConstantAsMetadata::get(
5154                              Constant::getNullValue(Type::getInt64Ty(Context))),
5155                          MD.getOperand(2)};
5156     return MDNode::get(Context, Elts2);
5157   }
5158   // Create a MDNode <MD, MD, offset 0>
5159   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
5160                                     Type::getInt64Ty(Context)))};
5161   return MDNode::get(Context, Elts);
5162 }
5163 
UpgradeBitCastInst(unsigned Opc,Value * V,Type * DestTy,Instruction * & Temp)5164 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5165                                       Instruction *&Temp) {
5166   if (Opc != Instruction::BitCast)
5167     return nullptr;
5168 
5169   Temp = nullptr;
5170   Type *SrcTy = V->getType();
5171   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5172       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5173     LLVMContext &Context = V->getContext();
5174 
5175     // We have no information about target data layout, so we assume that
5176     // the maximum pointer size is 64bit.
5177     Type *MidTy = Type::getInt64Ty(Context);
5178     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5179 
5180     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5181   }
5182 
5183   return nullptr;
5184 }
5185 
UpgradeBitCastExpr(unsigned Opc,Constant * C,Type * DestTy)5186 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5187   if (Opc != Instruction::BitCast)
5188     return nullptr;
5189 
5190   Type *SrcTy = C->getType();
5191   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5192       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5193     LLVMContext &Context = C->getContext();
5194 
5195     // We have no information about target data layout, so we assume that
5196     // the maximum pointer size is 64bit.
5197     Type *MidTy = Type::getInt64Ty(Context);
5198 
5199     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
5200                                      DestTy);
5201   }
5202 
5203   return nullptr;
5204 }
5205 
5206 /// Check the debug info version number, if it is out-dated, drop the debug
5207 /// info. Return true if module is modified.
UpgradeDebugInfo(Module & M)5208 bool llvm::UpgradeDebugInfo(Module &M) {
5209   if (DisableAutoUpgradeDebugInfo)
5210     return false;
5211 
5212   // We need to get metadata before the module is verified (i.e., getModuleFlag
5213   // makes assumptions that we haven't verified yet). Carefully extract the flag
5214   // from the metadata.
5215   unsigned Version = 0;
5216   if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5217     auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5218       if (Flag->getNumOperands() < 3)
5219         return false;
5220       if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5221         return K->getString() == "Debug Info Version";
5222       return false;
5223     });
5224     if (OpIt != ModFlags->op_end()) {
5225       const MDOperand &ValOp = (*OpIt)->getOperand(2);
5226       if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5227         Version = CI->getZExtValue();
5228     }
5229   }
5230 
5231   if (Version == DEBUG_METADATA_VERSION) {
5232     bool BrokenDebugInfo = false;
5233     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5234       report_fatal_error("Broken module found, compilation aborted!");
5235     if (!BrokenDebugInfo)
5236       // Everything is ok.
5237       return false;
5238     else {
5239       // Diagnose malformed debug info.
5240       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5241       M.getContext().diagnose(Diag);
5242     }
5243   }
5244   bool Modified = StripDebugInfo(M);
5245   if (Modified && Version != DEBUG_METADATA_VERSION) {
5246     // Diagnose a version mismatch.
5247     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5248     M.getContext().diagnose(DiagVersion);
5249   }
5250   return Modified;
5251 }
5252 
upgradeNVVMFnVectorAttr(const StringRef Attr,const char DimC,GlobalValue * GV,const Metadata * V)5253 static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5254                                     GlobalValue *GV, const Metadata *V) {
5255   Function *F = cast<Function>(GV);
5256 
5257   constexpr StringLiteral DefaultValue = "1";
5258   StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5259   unsigned Length = 0;
5260 
5261   if (F->hasFnAttribute(Attr)) {
5262     // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5263     // parse these elements placing them into Vect3
5264     StringRef S = F->getFnAttribute(Attr).getValueAsString();
5265     for (; Length < 3 && !S.empty(); Length++) {
5266       auto [Part, Rest] = S.split(',');
5267       Vect3[Length] = Part.trim();
5268       S = Rest;
5269     }
5270   }
5271 
5272   const unsigned Dim = DimC - 'x';
5273   assert(Dim < 3 && "Unexpected dim char");
5274 
5275   const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5276 
5277   // local variable required for StringRef in Vect3 to point to.
5278   const std::string VStr = llvm::utostr(VInt);
5279   Vect3[Dim] = VStr;
5280   Length = std::max(Length, Dim + 1);
5281 
5282   const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5283   F->addFnAttr(Attr, NewAttr);
5284 }
5285 
isXYZ(StringRef S)5286 static inline bool isXYZ(StringRef S) {
5287   return S == "x" || S == "y" || S == "z";
5288 }
5289 
upgradeSingleNVVMAnnotation(GlobalValue * GV,StringRef K,const Metadata * V)5290 bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5291                                         const Metadata *V) {
5292   if (K == "kernel") {
5293     if (!mdconst::extract<ConstantInt>(V)->isZero())
5294       cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5295     return true;
5296   }
5297   if (K == "align") {
5298     // V is a bitfeild specifying two 16-bit values. The alignment value is
5299     // specfied in low 16-bits, The index is specified in the high bits. For the
5300     // index, 0 indicates the return value while higher values correspond to
5301     // each parameter (idx = param + 1).
5302     const uint64_t AlignIdxValuePair =
5303         mdconst::extract<ConstantInt>(V)->getZExtValue();
5304     const unsigned Idx = (AlignIdxValuePair >> 16);
5305     const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5306     cast<Function>(GV)->addAttributeAtIndex(
5307         Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5308     return true;
5309   }
5310   if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5311     const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5312     cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5313     return true;
5314   }
5315   if (K == "minctasm") {
5316     const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5317     cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5318     return true;
5319   }
5320   if (K == "maxnreg") {
5321     const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5322     cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5323     return true;
5324   }
5325   if (K.consume_front("maxntid") && isXYZ(K)) {
5326     upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5327     return true;
5328   }
5329   if (K.consume_front("reqntid") && isXYZ(K)) {
5330     upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5331     return true;
5332   }
5333   if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5334     upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5335     return true;
5336   }
5337 
5338   return false;
5339 }
5340 
UpgradeNVVMAnnotations(Module & M)5341 void llvm::UpgradeNVVMAnnotations(Module &M) {
5342   NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5343   if (!NamedMD)
5344     return;
5345 
5346   SmallVector<MDNode *, 8> NewNodes;
5347   SmallSet<const MDNode *, 8> SeenNodes;
5348   for (MDNode *MD : NamedMD->operands()) {
5349     if (!SeenNodes.insert(MD).second)
5350       continue;
5351 
5352     auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5353     if (!GV)
5354       continue;
5355 
5356     assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5357 
5358     SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5359     // Each nvvm.annotations metadata entry will be of the following form:
5360     //   !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5361     // start index = 1, to skip the global variable key
5362     // increment = 2, to skip the value for each property-value pairs
5363     for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5364       MDString *K = cast<MDString>(MD->getOperand(j));
5365       const MDOperand &V = MD->getOperand(j + 1);
5366       bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5367       if (!Upgraded)
5368         NewOperands.append({K, V});
5369     }
5370 
5371     if (NewOperands.size() > 1)
5372       NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5373   }
5374 
5375   NamedMD->clearOperands();
5376   for (MDNode *N : NewNodes)
5377     NamedMD->addOperand(N);
5378 }
5379 
5380 /// This checks for objc retain release marker which should be upgraded. It
5381 /// returns true if module is modified.
upgradeRetainReleaseMarker(Module & M)5382 static bool upgradeRetainReleaseMarker(Module &M) {
5383   bool Changed = false;
5384   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5385   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5386   if (ModRetainReleaseMarker) {
5387     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5388     if (Op) {
5389       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5390       if (ID) {
5391         SmallVector<StringRef, 4> ValueComp;
5392         ID->getString().split(ValueComp, "#");
5393         if (ValueComp.size() == 2) {
5394           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5395           ID = MDString::get(M.getContext(), NewValue);
5396         }
5397         M.addModuleFlag(Module::Error, MarkerKey, ID);
5398         M.eraseNamedMetadata(ModRetainReleaseMarker);
5399         Changed = true;
5400       }
5401     }
5402   }
5403   return Changed;
5404 }
5405 
UpgradeARCRuntime(Module & M)5406 void llvm::UpgradeARCRuntime(Module &M) {
5407   // This lambda converts normal function calls to ARC runtime functions to
5408   // intrinsic calls.
5409   auto UpgradeToIntrinsic = [&](const char *OldFunc,
5410                                 llvm::Intrinsic::ID IntrinsicFunc) {
5411     Function *Fn = M.getFunction(OldFunc);
5412 
5413     if (!Fn)
5414       return;
5415 
5416     Function *NewFn =
5417         llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5418 
5419     for (User *U : make_early_inc_range(Fn->users())) {
5420       CallInst *CI = dyn_cast<CallInst>(U);
5421       if (!CI || CI->getCalledFunction() != Fn)
5422         continue;
5423 
5424       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5425       FunctionType *NewFuncTy = NewFn->getFunctionType();
5426       SmallVector<Value *, 2> Args;
5427 
5428       // Don't upgrade the intrinsic if it's not valid to bitcast the return
5429       // value to the return type of the old function.
5430       if (NewFuncTy->getReturnType() != CI->getType() &&
5431           !CastInst::castIsValid(Instruction::BitCast, CI,
5432                                  NewFuncTy->getReturnType()))
5433         continue;
5434 
5435       bool InvalidCast = false;
5436 
5437       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5438         Value *Arg = CI->getArgOperand(I);
5439 
5440         // Bitcast argument to the parameter type of the new function if it's
5441         // not a variadic argument.
5442         if (I < NewFuncTy->getNumParams()) {
5443           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5444           // to the parameter type of the new function.
5445           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5446                                      NewFuncTy->getParamType(I))) {
5447             InvalidCast = true;
5448             break;
5449           }
5450           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5451         }
5452         Args.push_back(Arg);
5453       }
5454 
5455       if (InvalidCast)
5456         continue;
5457 
5458       // Create a call instruction that calls the new function.
5459       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5460       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5461       NewCall->takeName(CI);
5462 
5463       // Bitcast the return value back to the type of the old call.
5464       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5465 
5466       if (!CI->use_empty())
5467         CI->replaceAllUsesWith(NewRetVal);
5468       CI->eraseFromParent();
5469     }
5470 
5471     if (Fn->use_empty())
5472       Fn->eraseFromParent();
5473   };
5474 
5475   // Unconditionally convert a call to "clang.arc.use" to a call to
5476   // "llvm.objc.clang.arc.use".
5477   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5478 
5479   // Upgrade the retain release marker. If there is no need to upgrade
5480   // the marker, that means either the module is already new enough to contain
5481   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5482   if (!upgradeRetainReleaseMarker(M))
5483     return;
5484 
5485   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5486       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5487       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5488       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5489       {"objc_autoreleaseReturnValue",
5490        llvm::Intrinsic::objc_autoreleaseReturnValue},
5491       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5492       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5493       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5494       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5495       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5496       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5497       {"objc_release", llvm::Intrinsic::objc_release},
5498       {"objc_retain", llvm::Intrinsic::objc_retain},
5499       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5500       {"objc_retainAutoreleaseReturnValue",
5501        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5502       {"objc_retainAutoreleasedReturnValue",
5503        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5504       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5505       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5506       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5507       {"objc_unsafeClaimAutoreleasedReturnValue",
5508        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5509       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5510       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5511       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5512       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5513       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5514       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5515       {"objc_arc_annotation_topdown_bbstart",
5516        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5517       {"objc_arc_annotation_topdown_bbend",
5518        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5519       {"objc_arc_annotation_bottomup_bbstart",
5520        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5521       {"objc_arc_annotation_bottomup_bbend",
5522        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5523 
5524   for (auto &I : RuntimeFuncs)
5525     UpgradeToIntrinsic(I.first, I.second);
5526 }
5527 
UpgradeModuleFlags(Module & M)5528 bool llvm::UpgradeModuleFlags(Module &M) {
5529   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5530   if (!ModFlags)
5531     return false;
5532 
5533   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5534   bool HasSwiftVersionFlag = false;
5535   uint8_t SwiftMajorVersion, SwiftMinorVersion;
5536   uint32_t SwiftABIVersion;
5537   auto Int8Ty = Type::getInt8Ty(M.getContext());
5538   auto Int32Ty = Type::getInt32Ty(M.getContext());
5539 
5540   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5541     MDNode *Op = ModFlags->getOperand(I);
5542     if (Op->getNumOperands() != 3)
5543       continue;
5544     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5545     if (!ID)
5546       continue;
5547     auto SetBehavior = [&](Module::ModFlagBehavior B) {
5548       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5549                               Type::getInt32Ty(M.getContext()), B)),
5550                           MDString::get(M.getContext(), ID->getString()),
5551                           Op->getOperand(2)};
5552       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5553       Changed = true;
5554     };
5555 
5556     if (ID->getString() == "Objective-C Image Info Version")
5557       HasObjCFlag = true;
5558     if (ID->getString() == "Objective-C Class Properties")
5559       HasClassProperties = true;
5560     // Upgrade PIC from Error/Max to Min.
5561     if (ID->getString() == "PIC Level") {
5562       if (auto *Behavior =
5563               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5564         uint64_t V = Behavior->getLimitedValue();
5565         if (V == Module::Error || V == Module::Max)
5566           SetBehavior(Module::Min);
5567       }
5568     }
5569     // Upgrade "PIE Level" from Error to Max.
5570     if (ID->getString() == "PIE Level")
5571       if (auto *Behavior =
5572               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5573         if (Behavior->getLimitedValue() == Module::Error)
5574           SetBehavior(Module::Max);
5575 
5576     // Upgrade branch protection and return address signing module flags. The
5577     // module flag behavior for these fields were Error and now they are Min.
5578     if (ID->getString() == "branch-target-enforcement" ||
5579         ID->getString().starts_with("sign-return-address")) {
5580       if (auto *Behavior =
5581               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5582         if (Behavior->getLimitedValue() == Module::Error) {
5583           Type *Int32Ty = Type::getInt32Ty(M.getContext());
5584           Metadata *Ops[3] = {
5585               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5586               Op->getOperand(1), Op->getOperand(2)};
5587           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5588           Changed = true;
5589         }
5590       }
5591     }
5592 
5593     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5594     // section name so that llvm-lto will not complain about mismatching
5595     // module flags that is functionally the same.
5596     if (ID->getString() == "Objective-C Image Info Section") {
5597       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5598         SmallVector<StringRef, 4> ValueComp;
5599         Value->getString().split(ValueComp, " ");
5600         if (ValueComp.size() != 1) {
5601           std::string NewValue;
5602           for (auto &S : ValueComp)
5603             NewValue += S.str();
5604           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5605                               MDString::get(M.getContext(), NewValue)};
5606           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5607           Changed = true;
5608         }
5609       }
5610     }
5611 
5612     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5613     // If the higher bits are set, it adds new module flag for swift info.
5614     if (ID->getString() == "Objective-C Garbage Collection") {
5615       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5616       if (Md) {
5617         assert(Md->getValue() && "Expected non-empty metadata");
5618         auto Type = Md->getValue()->getType();
5619         if (Type == Int8Ty)
5620           continue;
5621         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5622         if ((Val & 0xff) != Val) {
5623           HasSwiftVersionFlag = true;
5624           SwiftABIVersion = (Val & 0xff00) >> 8;
5625           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5626           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5627         }
5628         Metadata *Ops[3] = {
5629           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5630           Op->getOperand(1),
5631           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5632         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5633         Changed = true;
5634       }
5635     }
5636 
5637     if (ID->getString() == "amdgpu_code_object_version") {
5638       Metadata *Ops[3] = {
5639           Op->getOperand(0),
5640           MDString::get(M.getContext(), "amdhsa_code_object_version"),
5641           Op->getOperand(2)};
5642       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5643       Changed = true;
5644     }
5645   }
5646 
5647   // "Objective-C Class Properties" is recently added for Objective-C. We
5648   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5649   // flag of value 0, so we can correclty downgrade this flag when trying to
5650   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5651   // this module flag.
5652   if (HasObjCFlag && !HasClassProperties) {
5653     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5654                     (uint32_t)0);
5655     Changed = true;
5656   }
5657 
5658   if (HasSwiftVersionFlag) {
5659     M.addModuleFlag(Module::Error, "Swift ABI Version",
5660                     SwiftABIVersion);
5661     M.addModuleFlag(Module::Error, "Swift Major Version",
5662                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5663     M.addModuleFlag(Module::Error, "Swift Minor Version",
5664                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5665     Changed = true;
5666   }
5667 
5668   return Changed;
5669 }
5670 
UpgradeSectionAttributes(Module & M)5671 void llvm::UpgradeSectionAttributes(Module &M) {
5672   auto TrimSpaces = [](StringRef Section) -> std::string {
5673     SmallVector<StringRef, 5> Components;
5674     Section.split(Components, ',');
5675 
5676     SmallString<32> Buffer;
5677     raw_svector_ostream OS(Buffer);
5678 
5679     for (auto Component : Components)
5680       OS << ',' << Component.trim();
5681 
5682     return std::string(OS.str().substr(1));
5683   };
5684 
5685   for (auto &GV : M.globals()) {
5686     if (!GV.hasSection())
5687       continue;
5688 
5689     StringRef Section = GV.getSection();
5690 
5691     if (!Section.starts_with("__DATA, __objc_catlist"))
5692       continue;
5693 
5694     // __DATA, __objc_catlist, regular, no_dead_strip
5695     // __DATA,__objc_catlist,regular,no_dead_strip
5696     GV.setSection(TrimSpaces(Section));
5697   }
5698 }
5699 
5700 namespace {
5701 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5702 // callsites within a function that did not also have the strictfp attribute.
5703 // Since 10.0, if strict FP semantics are needed within a function, the
5704 // function must have the strictfp attribute and all calls within the function
5705 // must also have the strictfp attribute. This latter restriction is
5706 // necessary to prevent unwanted libcall simplification when a function is
5707 // being cloned (such as for inlining).
5708 //
5709 // The "dangling" strictfp attribute usage was only used to prevent constant
5710 // folding and other libcall simplification. The nobuiltin attribute on the
5711 // callsite has the same effect.
5712 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5713   StrictFPUpgradeVisitor() = default;
5714 
visitCallBase__anonaa1e81db0611::StrictFPUpgradeVisitor5715   void visitCallBase(CallBase &Call) {
5716     if (!Call.isStrictFP())
5717       return;
5718     if (isa<ConstrainedFPIntrinsic>(&Call))
5719       return;
5720     // If we get here, the caller doesn't have the strictfp attribute
5721     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5722     Call.removeFnAttr(Attribute::StrictFP);
5723     Call.addFnAttr(Attribute::NoBuiltin);
5724   }
5725 };
5726 
5727 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5728 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5729     : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5730   AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5731 
visitAtomicRMWInst__anonaa1e81db0611::AMDGPUUnsafeFPAtomicsUpgradeVisitor5732   void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5733     if (!RMW.isFloatingPointOperation())
5734       return;
5735 
5736     MDNode *Empty = MDNode::get(RMW.getContext(), {});
5737     RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5738     RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5739     RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5740   }
5741 };
5742 } // namespace
5743 
UpgradeFunctionAttributes(Function & F)5744 void llvm::UpgradeFunctionAttributes(Function &F) {
5745   // If a function definition doesn't have the strictfp attribute,
5746   // convert any callsite strictfp attributes to nobuiltin.
5747   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5748     StrictFPUpgradeVisitor SFPV;
5749     SFPV.visit(F);
5750   }
5751 
5752   // Remove all incompatibile attributes from function.
5753   F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5754       F.getReturnType(), F.getAttributes().getRetAttrs()));
5755   for (auto &Arg : F.args())
5756     Arg.removeAttrs(
5757         AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5758 
5759   // Older versions of LLVM treated an "implicit-section-name" attribute
5760   // similarly to directly setting the section on a Function.
5761   if (Attribute A = F.getFnAttribute("implicit-section-name");
5762       A.isValid() && A.isStringAttribute()) {
5763     F.setSection(A.getValueAsString());
5764     F.removeFnAttr("implicit-section-name");
5765   }
5766 
5767   if (!F.empty()) {
5768     // For some reason this is called twice, and the first time is before any
5769     // instructions are loaded into the body.
5770 
5771     if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5772         A.isValid()) {
5773 
5774       if (A.getValueAsBool()) {
5775         AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5776         Visitor.visit(F);
5777       }
5778 
5779       // We will leave behind dead attribute uses on external declarations, but
5780       // clang never added these to declarations anyway.
5781       F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5782     }
5783   }
5784 }
5785 
isOldLoopArgument(Metadata * MD)5786 static bool isOldLoopArgument(Metadata *MD) {
5787   auto *T = dyn_cast_or_null<MDTuple>(MD);
5788   if (!T)
5789     return false;
5790   if (T->getNumOperands() < 1)
5791     return false;
5792   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5793   if (!S)
5794     return false;
5795   return S->getString().starts_with("llvm.vectorizer.");
5796 }
5797 
upgradeLoopTag(LLVMContext & C,StringRef OldTag)5798 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5799   StringRef OldPrefix = "llvm.vectorizer.";
5800   assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5801 
5802   if (OldTag == "llvm.vectorizer.unroll")
5803     return MDString::get(C, "llvm.loop.interleave.count");
5804 
5805   return MDString::get(
5806       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5807              .str());
5808 }
5809 
upgradeLoopArgument(Metadata * MD)5810 static Metadata *upgradeLoopArgument(Metadata *MD) {
5811   auto *T = dyn_cast_or_null<MDTuple>(MD);
5812   if (!T)
5813     return MD;
5814   if (T->getNumOperands() < 1)
5815     return MD;
5816   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5817   if (!OldTag)
5818     return MD;
5819   if (!OldTag->getString().starts_with("llvm.vectorizer."))
5820     return MD;
5821 
5822   // This has an old tag.  Upgrade it.
5823   SmallVector<Metadata *, 8> Ops;
5824   Ops.reserve(T->getNumOperands());
5825   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5826   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5827     Ops.push_back(T->getOperand(I));
5828 
5829   return MDTuple::get(T->getContext(), Ops);
5830 }
5831 
upgradeInstructionLoopAttachment(MDNode & N)5832 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5833   auto *T = dyn_cast<MDTuple>(&N);
5834   if (!T)
5835     return &N;
5836 
5837   if (none_of(T->operands(), isOldLoopArgument))
5838     return &N;
5839 
5840   SmallVector<Metadata *, 8> Ops;
5841   Ops.reserve(T->getNumOperands());
5842   for (Metadata *MD : T->operands())
5843     Ops.push_back(upgradeLoopArgument(MD));
5844 
5845   return MDTuple::get(T->getContext(), Ops);
5846 }
5847 
UpgradeDataLayoutString(StringRef DL,StringRef TT)5848 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5849   Triple T(TT);
5850   // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5851   // the address space of globals to 1. This does not apply to SPIRV Logical.
5852   if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5853        (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5854       !DL.contains("-G") && !DL.starts_with("G")) {
5855     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5856   }
5857 
5858   if (T.isLoongArch64() || T.isRISCV64()) {
5859     // Make i32 a native type for 64-bit LoongArch and RISC-V.
5860     auto I = DL.find("-n64-");
5861     if (I != StringRef::npos)
5862       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5863     return DL.str();
5864   }
5865 
5866   std::string Res = DL.str();
5867   // AMDGCN data layout upgrades.
5868   if (T.isAMDGCN()) {
5869     // Define address spaces for constants.
5870     if (!DL.contains("-G") && !DL.starts_with("G"))
5871       Res.append(Res.empty() ? "G1" : "-G1");
5872 
5873     // Add missing non-integral declarations.
5874     // This goes before adding new address spaces to prevent incoherent string
5875     // values.
5876     if (!DL.contains("-ni") && !DL.starts_with("ni"))
5877       Res.append("-ni:7:8:9");
5878     // Update ni:7 to ni:7:8:9.
5879     if (DL.ends_with("ni:7"))
5880       Res.append(":8:9");
5881     if (DL.ends_with("ni:7:8"))
5882       Res.append(":9");
5883 
5884     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5885     // resources) An empty data layout has already been upgraded to G1 by now.
5886     if (!DL.contains("-p7") && !DL.starts_with("p7"))
5887       Res.append("-p7:160:256:256:32");
5888     if (!DL.contains("-p8") && !DL.starts_with("p8"))
5889       Res.append("-p8:128:128:128:48");
5890     constexpr StringRef OldP8("-p8:128:128-");
5891     if (DL.contains(OldP8))
5892       Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
5893     if (!DL.contains("-p9") && !DL.starts_with("p9"))
5894       Res.append("-p9:192:256:256:32");
5895 
5896     return Res;
5897   }
5898 
5899   auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5900     // If the datalayout matches the expected format, add pointer size address
5901     // spaces to the datalayout.
5902     StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5903     if (!DL.contains(AddrSpaces)) {
5904       SmallVector<StringRef, 4> Groups;
5905       Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5906       if (R.match(Res, &Groups))
5907         Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5908     }
5909   };
5910 
5911   // AArch64 data layout upgrades.
5912   if (T.isAArch64()) {
5913     // Add "-Fn32"
5914     if (!DL.empty() && !DL.contains("-Fn32"))
5915       Res.append("-Fn32");
5916     AddPtr32Ptr64AddrSpaces();
5917     return Res;
5918   }
5919 
5920   if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5921       T.isWasm()) {
5922     // Mips64 with o32 ABI did not add "-i128:128".
5923     // Add "-i128:128"
5924     std::string I64 = "-i64:64";
5925     std::string I128 = "-i128:128";
5926     if (!StringRef(Res).contains(I128)) {
5927       size_t Pos = Res.find(I64);
5928       if (Pos != size_t(-1))
5929         Res.insert(Pos + I64.size(), I128);
5930     }
5931     return Res;
5932   }
5933 
5934   if (!T.isX86())
5935     return Res;
5936 
5937   AddPtr32Ptr64AddrSpaces();
5938 
5939   // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5940   // for i128 operations prior to this being reflected in the data layout, and
5941   // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5942   // boundaries, so although this is a breaking change, the upgrade is expected
5943   // to fix more IR than it breaks.
5944   // Intel MCU is an exception and uses 4-byte-alignment.
5945   if (!T.isOSIAMCU()) {
5946     std::string I128 = "-i128:128";
5947     if (StringRef Ref = Res; !Ref.contains(I128)) {
5948       SmallVector<StringRef, 4> Groups;
5949       Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5950       if (R.match(Res, &Groups))
5951         Res = (Groups[1] + I128 + Groups[3]).str();
5952     }
5953   }
5954 
5955   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5956   // Raising the alignment is safe because Clang did not produce f80 values in
5957   // the MSVC environment before this upgrade was added.
5958   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5959     StringRef Ref = Res;
5960     auto I = Ref.find("-f80:32-");
5961     if (I != StringRef::npos)
5962       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5963   }
5964 
5965   return Res;
5966 }
5967 
UpgradeAttributes(AttrBuilder & B)5968 void llvm::UpgradeAttributes(AttrBuilder &B) {
5969   StringRef FramePointer;
5970   Attribute A = B.getAttribute("no-frame-pointer-elim");
5971   if (A.isValid()) {
5972     // The value can be "true" or "false".
5973     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5974     B.removeAttribute("no-frame-pointer-elim");
5975   }
5976   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5977     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5978     if (FramePointer != "all")
5979       FramePointer = "non-leaf";
5980     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5981   }
5982   if (!FramePointer.empty())
5983     B.addAttribute("frame-pointer", FramePointer);
5984 
5985   A = B.getAttribute("null-pointer-is-valid");
5986   if (A.isValid()) {
5987     // The value can be "true" or "false".
5988     bool NullPointerIsValid = A.getValueAsString() == "true";
5989     B.removeAttribute("null-pointer-is-valid");
5990     if (NullPointerIsValid)
5991       B.addAttribute(Attribute::NullPointerIsValid);
5992   }
5993 }
5994 
UpgradeOperandBundles(std::vector<OperandBundleDef> & Bundles)5995 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5996   // clang.arc.attachedcall bundles are now required to have an operand.
5997   // If they don't, it's okay to drop them entirely: when there is an operand,
5998   // the "attachedcall" is meaningful and required, but without an operand,
5999   // it's just a marker NOP.  Dropping it merely prevents an optimization.
6000   erase_if(Bundles, [&](OperandBundleDef &OBD) {
6001     return OBD.getTag() == "clang.arc.attachedcall" &&
6002            OBD.inputs().empty();
6003   });
6004 }
6005