xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision a0409676120c1e558d0ade943019934e0f15118d)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/InstVisitor.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/IntrinsicsAArch64.h"
27 #include "llvm/IR/IntrinsicsARM.h"
28 #include "llvm/IR/IntrinsicsX86.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Verifier.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/Regex.h"
34 #include <cstring>
35 using namespace llvm;
36 
37 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
38 
39 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
40 // changed their type from v4f32 to v2i64.
41 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
42                                   Function *&NewFn) {
43   // Check whether this is an old version of the function, which received
44   // v4f32 arguments.
45   Type *Arg0Type = F->getFunctionType()->getParamType(0);
46   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
47     return false;
48 
49   // Yes, it's old, replace it with new version.
50   rename(F);
51   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
52   return true;
53 }
54 
55 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
56 // arguments have changed their type from i32 to i8.
57 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
58                                              Function *&NewFn) {
59   // Check that the last argument is an i32.
60   Type *LastArgType = F->getFunctionType()->getParamType(
61      F->getFunctionType()->getNumParams() - 1);
62   if (!LastArgType->isIntegerTy(32))
63     return false;
64 
65   // Move this function aside and map down.
66   rename(F);
67   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
68   return true;
69 }
70 
71 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
72   // All of the intrinsics matches below should be marked with which llvm
73   // version started autoupgrading them. At some point in the future we would
74   // like to use this information to remove upgrade code for some older
75   // intrinsics. It is currently undecided how we will determine that future
76   // point.
77   if (Name == "addcarryx.u32" || // Added in 8.0
78       Name == "addcarryx.u64" || // Added in 8.0
79       Name == "addcarry.u32" || // Added in 8.0
80       Name == "addcarry.u64" || // Added in 8.0
81       Name == "subborrow.u32" || // Added in 8.0
82       Name == "subborrow.u64" || // Added in 8.0
83       Name.startswith("sse2.padds.") || // Added in 8.0
84       Name.startswith("sse2.psubs.") || // Added in 8.0
85       Name.startswith("sse2.paddus.") || // Added in 8.0
86       Name.startswith("sse2.psubus.") || // Added in 8.0
87       Name.startswith("avx2.padds.") || // Added in 8.0
88       Name.startswith("avx2.psubs.") || // Added in 8.0
89       Name.startswith("avx2.paddus.") || // Added in 8.0
90       Name.startswith("avx2.psubus.") || // Added in 8.0
91       Name.startswith("avx512.padds.") || // Added in 8.0
92       Name.startswith("avx512.psubs.") || // Added in 8.0
93       Name.startswith("avx512.mask.padds.") || // Added in 8.0
94       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
95       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
96       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
97       Name=="ssse3.pabs.b.128" || // Added in 6.0
98       Name=="ssse3.pabs.w.128" || // Added in 6.0
99       Name=="ssse3.pabs.d.128" || // Added in 6.0
100       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
101       Name.startswith("fma.vfmadd.") || // Added in 7.0
102       Name.startswith("fma.vfmsub.") || // Added in 7.0
103       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
104       Name.startswith("fma.vfnmadd.") || // Added in 7.0
105       Name.startswith("fma.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
110       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
111       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
113       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
114       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
115       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
116       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
117       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
118       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
119       Name.startswith("avx512.kunpck") || //added in 6.0
120       Name.startswith("avx2.pabs.") || // Added in 6.0
121       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
122       Name.startswith("avx512.broadcastm") || // Added in 6.0
123       Name == "sse.sqrt.ss" || // Added in 7.0
124       Name == "sse2.sqrt.sd" || // Added in 7.0
125       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
126       Name.startswith("avx.sqrt.p") || // Added in 7.0
127       Name.startswith("sse2.sqrt.p") || // Added in 7.0
128       Name.startswith("sse.sqrt.p") || // Added in 7.0
129       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
130       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
131       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
132       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
133       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
134       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
135       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
136       Name.startswith("avx.vperm2f128.") || // Added in 6.0
137       Name == "avx2.vperm2i128" || // Added in 6.0
138       Name == "sse.add.ss" || // Added in 4.0
139       Name == "sse2.add.sd" || // Added in 4.0
140       Name == "sse.sub.ss" || // Added in 4.0
141       Name == "sse2.sub.sd" || // Added in 4.0
142       Name == "sse.mul.ss" || // Added in 4.0
143       Name == "sse2.mul.sd" || // Added in 4.0
144       Name == "sse.div.ss" || // Added in 4.0
145       Name == "sse2.div.sd" || // Added in 4.0
146       Name == "sse41.pmaxsb" || // Added in 3.9
147       Name == "sse2.pmaxs.w" || // Added in 3.9
148       Name == "sse41.pmaxsd" || // Added in 3.9
149       Name == "sse2.pmaxu.b" || // Added in 3.9
150       Name == "sse41.pmaxuw" || // Added in 3.9
151       Name == "sse41.pmaxud" || // Added in 3.9
152       Name == "sse41.pminsb" || // Added in 3.9
153       Name == "sse2.pmins.w" || // Added in 3.9
154       Name == "sse41.pminsd" || // Added in 3.9
155       Name == "sse2.pminu.b" || // Added in 3.9
156       Name == "sse41.pminuw" || // Added in 3.9
157       Name == "sse41.pminud" || // Added in 3.9
158       Name == "avx512.kand.w" || // Added in 7.0
159       Name == "avx512.kandn.w" || // Added in 7.0
160       Name == "avx512.knot.w" || // Added in 7.0
161       Name == "avx512.kor.w" || // Added in 7.0
162       Name == "avx512.kxor.w" || // Added in 7.0
163       Name == "avx512.kxnor.w" || // Added in 7.0
164       Name == "avx512.kortestc.w" || // Added in 7.0
165       Name == "avx512.kortestz.w" || // Added in 7.0
166       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
167       Name.startswith("avx2.pmax") || // Added in 3.9
168       Name.startswith("avx2.pmin") || // Added in 3.9
169       Name.startswith("avx512.mask.pmax") || // Added in 4.0
170       Name.startswith("avx512.mask.pmin") || // Added in 4.0
171       Name.startswith("avx2.vbroadcast") || // Added in 3.8
172       Name.startswith("avx2.pbroadcast") || // Added in 3.8
173       Name.startswith("avx.vpermil.") || // Added in 3.1
174       Name.startswith("sse2.pshuf") || // Added in 3.9
175       Name.startswith("avx512.pbroadcast") || // Added in 3.9
176       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
177       Name.startswith("avx512.mask.movddup") || // Added in 3.9
178       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
179       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
180       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
181       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
182       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
183       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
184       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
185       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
186       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
187       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
188       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
189       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
190       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
191       Name.startswith("avx512.mask.pand.") || // Added in 3.9
192       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
193       Name.startswith("avx512.mask.por.") || // Added in 3.9
194       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
195       Name.startswith("avx512.mask.and.") || // Added in 3.9
196       Name.startswith("avx512.mask.andn.") || // Added in 3.9
197       Name.startswith("avx512.mask.or.") || // Added in 3.9
198       Name.startswith("avx512.mask.xor.") || // Added in 3.9
199       Name.startswith("avx512.mask.padd.") || // Added in 4.0
200       Name.startswith("avx512.mask.psub.") || // Added in 4.0
201       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
202       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
203       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
204       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
205       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
206       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
207       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
208       Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
209       Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
210       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
211       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
212       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
213       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
214       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
215       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
216       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
217       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
218       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
219       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
220       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
221       Name == "avx512.cvtusi2sd" || // Added in 7.0
222       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
223       Name == "sse2.pmulu.dq" || // Added in 7.0
224       Name == "sse41.pmuldq" || // Added in 7.0
225       Name == "avx2.pmulu.dq" || // Added in 7.0
226       Name == "avx2.pmul.dq" || // Added in 7.0
227       Name == "avx512.pmulu.dq.512" || // Added in 7.0
228       Name == "avx512.pmul.dq.512" || // Added in 7.0
229       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
230       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
231       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
232       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
233       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
234       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
235       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
236       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
237       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
238       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
239       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
240       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
241       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
242       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
243       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
244       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
245       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
246       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
247       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
248       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
249       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
250       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
251       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
252       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
253       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
254       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
255       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
256       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
257       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
258       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
259       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
260       Name.startswith("avx512.mask.pslli") || // Added in 4.0
261       Name.startswith("avx512.mask.psrai") || // Added in 4.0
262       Name.startswith("avx512.mask.psrli") || // Added in 4.0
263       Name.startswith("avx512.mask.psllv") || // Added in 4.0
264       Name.startswith("avx512.mask.psrav") || // Added in 4.0
265       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
266       Name.startswith("sse41.pmovsx") || // Added in 3.8
267       Name.startswith("sse41.pmovzx") || // Added in 3.9
268       Name.startswith("avx2.pmovsx") || // Added in 3.9
269       Name.startswith("avx2.pmovzx") || // Added in 3.9
270       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
271       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
272       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
273       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
274       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
275       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
276       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
277       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
282       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
283       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
284       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
285       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
287       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
288       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
289       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
290       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
291       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
292       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
293       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
294       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
295       Name.startswith("avx512.vpshld.") || // Added in 8.0
296       Name.startswith("avx512.vpshrd.") || // Added in 8.0
297       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
298       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
299       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
300       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
301       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
302       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
303       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
304       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
305       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
306       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
307       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
308       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
309       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
310       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
311       Name == "sse.cvtsi2ss" || // Added in 7.0
312       Name == "sse.cvtsi642ss" || // Added in 7.0
313       Name == "sse2.cvtsi2sd" || // Added in 7.0
314       Name == "sse2.cvtsi642sd" || // Added in 7.0
315       Name == "sse2.cvtss2sd" || // Added in 7.0
316       Name == "sse2.cvtdq2pd" || // Added in 3.9
317       Name == "sse2.cvtdq2ps" || // Added in 7.0
318       Name == "sse2.cvtps2pd" || // Added in 3.9
319       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
320       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
321       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
322       Name.startswith("vcvtph2ps.") || // Added in 11.0
323       Name.startswith("avx.vinsertf128.") || // Added in 3.7
324       Name == "avx2.vinserti128" || // Added in 3.7
325       Name.startswith("avx512.mask.insert") || // Added in 4.0
326       Name.startswith("avx.vextractf128.") || // Added in 3.7
327       Name == "avx2.vextracti128" || // Added in 3.7
328       Name.startswith("avx512.mask.vextract") || // Added in 4.0
329       Name.startswith("sse4a.movnt.") || // Added in 3.9
330       Name.startswith("avx.movnt.") || // Added in 3.2
331       Name.startswith("avx512.storent.") || // Added in 3.9
332       Name == "sse41.movntdqa" || // Added in 5.0
333       Name == "avx2.movntdqa" || // Added in 5.0
334       Name == "avx512.movntdqa" || // Added in 5.0
335       Name == "sse2.storel.dq" || // Added in 3.9
336       Name.startswith("sse.storeu.") || // Added in 3.9
337       Name.startswith("sse2.storeu.") || // Added in 3.9
338       Name.startswith("avx.storeu.") || // Added in 3.9
339       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
340       Name.startswith("avx512.mask.store.p") || // Added in 3.9
341       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
342       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
343       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
344       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
345       Name == "avx512.mask.store.ss" || // Added in 7.0
346       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
347       Name.startswith("avx512.mask.load.") || // Added in 3.9
348       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
349       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
350       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
351       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
352       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
353       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
354       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
355       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
356       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
357       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
358       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
359       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
360       Name == "sse42.crc32.64.8" || // Added in 3.4
361       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
362       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
363       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
364       Name.startswith("avx512.mask.valign.") || // Added in 4.0
365       Name.startswith("sse2.psll.dq") || // Added in 3.7
366       Name.startswith("sse2.psrl.dq") || // Added in 3.7
367       Name.startswith("avx2.psll.dq") || // Added in 3.7
368       Name.startswith("avx2.psrl.dq") || // Added in 3.7
369       Name.startswith("avx512.psll.dq") || // Added in 3.9
370       Name.startswith("avx512.psrl.dq") || // Added in 3.9
371       Name == "sse41.pblendw" || // Added in 3.7
372       Name.startswith("sse41.blendp") || // Added in 3.7
373       Name.startswith("avx.blend.p") || // Added in 3.7
374       Name == "avx2.pblendw" || // Added in 3.7
375       Name.startswith("avx2.pblendd.") || // Added in 3.7
376       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
377       Name == "avx2.vbroadcasti128" || // Added in 3.7
378       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
379       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
380       Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
381       Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
382       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
383       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
384       Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
385       Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
386       Name == "xop.vpcmov" || // Added in 3.8
387       Name == "xop.vpcmov.256" || // Added in 5.0
388       Name.startswith("avx512.mask.move.s") || // Added in 4.0
389       Name.startswith("avx512.cvtmask2") || // Added in 5.0
390       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
391       Name.startswith("xop.vprot") || // Added in 8.0
392       Name.startswith("avx512.prol") || // Added in 8.0
393       Name.startswith("avx512.pror") || // Added in 8.0
394       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
395       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
396       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
397       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
398       Name.startswith("avx512.ptestm") || //Added in 6.0
399       Name.startswith("avx512.ptestnm") || //Added in 6.0
400       Name.startswith("avx512.mask.pavg")) // Added in 6.0
401     return true;
402 
403   return false;
404 }
405 
406 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
407                                         Function *&NewFn) {
408   // Only handle intrinsics that start with "x86.".
409   if (!Name.startswith("x86."))
410     return false;
411   // Remove "x86." prefix.
412   Name = Name.substr(4);
413 
414   if (ShouldUpgradeX86Intrinsic(F, Name)) {
415     NewFn = nullptr;
416     return true;
417   }
418 
419   if (Name == "rdtscp") { // Added in 8.0
420     // If this intrinsic has 0 operands, it's the new version.
421     if (F->getFunctionType()->getNumParams() == 0)
422       return false;
423 
424     rename(F);
425     NewFn = Intrinsic::getDeclaration(F->getParent(),
426                                       Intrinsic::x86_rdtscp);
427     return true;
428   }
429 
430   // SSE4.1 ptest functions may have an old signature.
431   if (Name.startswith("sse41.ptest")) { // Added in 3.2
432     if (Name.substr(11) == "c")
433       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
434     if (Name.substr(11) == "z")
435       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
436     if (Name.substr(11) == "nzc")
437       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
438   }
439   // Several blend and other instructions with masks used the wrong number of
440   // bits.
441   if (Name == "sse41.insertps") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
443                                             NewFn);
444   if (Name == "sse41.dppd") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
446                                             NewFn);
447   if (Name == "sse41.dpps") // Added in 3.6
448     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
449                                             NewFn);
450   if (Name == "sse41.mpsadbw") // Added in 3.6
451     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
452                                             NewFn);
453   if (Name == "avx.dp.ps.256") // Added in 3.6
454     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
455                                             NewFn);
456   if (Name == "avx2.mpsadbw") // Added in 3.6
457     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
458                                             NewFn);
459 
460   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
461   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
462     rename(F);
463     NewFn = Intrinsic::getDeclaration(F->getParent(),
464                                       Intrinsic::x86_xop_vfrcz_ss);
465     return true;
466   }
467   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
468     rename(F);
469     NewFn = Intrinsic::getDeclaration(F->getParent(),
470                                       Intrinsic::x86_xop_vfrcz_sd);
471     return true;
472   }
473   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
474   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
475     auto Idx = F->getFunctionType()->getParamType(2);
476     if (Idx->isFPOrFPVectorTy()) {
477       rename(F);
478       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
479       unsigned EltSize = Idx->getScalarSizeInBits();
480       Intrinsic::ID Permil2ID;
481       if (EltSize == 64 && IdxSize == 128)
482         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
483       else if (EltSize == 32 && IdxSize == 128)
484         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
485       else if (EltSize == 64 && IdxSize == 256)
486         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
487       else
488         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
489       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
490       return true;
491     }
492   }
493 
494   if (Name == "seh.recoverfp") {
495     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
496     return true;
497   }
498 
499   return false;
500 }
501 
502 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
503   assert(F && "Illegal to upgrade a non-existent Function.");
504 
505   // Quickly eliminate it, if it's not a candidate.
506   StringRef Name = F->getName();
507   if (Name.size() <= 8 || !Name.startswith("llvm."))
508     return false;
509   Name = Name.substr(5); // Strip off "llvm."
510 
511   switch (Name[0]) {
512   default: break;
513   case 'a': {
514     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
515       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
516                                         F->arg_begin()->getType());
517       return true;
518     }
519     if (Name.startswith("arm.neon.vclz")) {
520       Type* args[2] = {
521         F->arg_begin()->getType(),
522         Type::getInt1Ty(F->getContext())
523       };
524       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
525       // the end of the name. Change name from llvm.arm.neon.vclz.* to
526       //  llvm.ctlz.*
527       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
528       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
529                                "llvm.ctlz." + Name.substr(14), F->getParent());
530       return true;
531     }
532     if (Name.startswith("arm.neon.vcnt")) {
533       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
534                                         F->arg_begin()->getType());
535       return true;
536     }
537     static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
538     if (vldRegex.match(Name)) {
539       auto fArgs = F->getFunctionType()->params();
540       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
541       // Can't use Intrinsic::getDeclaration here as the return types might
542       // then only be structurally equal.
543       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
544       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
545                                "llvm." + Name + ".p0i8", F->getParent());
546       return true;
547     }
548     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
549     if (vstRegex.match(Name)) {
550       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
551                                                 Intrinsic::arm_neon_vst2,
552                                                 Intrinsic::arm_neon_vst3,
553                                                 Intrinsic::arm_neon_vst4};
554 
555       static const Intrinsic::ID StoreLaneInts[] = {
556         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
557         Intrinsic::arm_neon_vst4lane
558       };
559 
560       auto fArgs = F->getFunctionType()->params();
561       Type *Tys[] = {fArgs[0], fArgs[1]};
562       if (Name.find("lane") == StringRef::npos)
563         NewFn = Intrinsic::getDeclaration(F->getParent(),
564                                           StoreInts[fArgs.size() - 3], Tys);
565       else
566         NewFn = Intrinsic::getDeclaration(F->getParent(),
567                                           StoreLaneInts[fArgs.size() - 5], Tys);
568       return true;
569     }
570     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
571       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
572       return true;
573     }
574     if (Name.startswith("arm.neon.vqadds.")) {
575       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
576                                         F->arg_begin()->getType());
577       return true;
578     }
579     if (Name.startswith("arm.neon.vqaddu.")) {
580       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
581                                         F->arg_begin()->getType());
582       return true;
583     }
584     if (Name.startswith("arm.neon.vqsubs.")) {
585       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
586                                         F->arg_begin()->getType());
587       return true;
588     }
589     if (Name.startswith("arm.neon.vqsubu.")) {
590       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
591                                         F->arg_begin()->getType());
592       return true;
593     }
594     if (Name.startswith("aarch64.neon.addp")) {
595       if (F->arg_size() != 2)
596         break; // Invalid IR.
597       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
598       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
599         NewFn = Intrinsic::getDeclaration(F->getParent(),
600                                           Intrinsic::aarch64_neon_faddp, Ty);
601         return true;
602       }
603     }
604     break;
605   }
606 
607   case 'c': {
608     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
609       rename(F);
610       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
611                                         F->arg_begin()->getType());
612       return true;
613     }
614     if (Name.startswith("cttz.") && F->arg_size() == 1) {
615       rename(F);
616       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
617                                         F->arg_begin()->getType());
618       return true;
619     }
620     break;
621   }
622   case 'd': {
623     if (Name == "dbg.value" && F->arg_size() == 4) {
624       rename(F);
625       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
626       return true;
627     }
628     break;
629   }
630   case 'e': {
631     SmallVector<StringRef, 2> Groups;
632     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
633     if (R.match(Name, &Groups)) {
634       Intrinsic::ID ID = Intrinsic::not_intrinsic;
635       if (Groups[1] == "fadd")
636         ID = Intrinsic::experimental_vector_reduce_v2_fadd;
637       if (Groups[1] == "fmul")
638         ID = Intrinsic::experimental_vector_reduce_v2_fmul;
639 
640       if (ID != Intrinsic::not_intrinsic) {
641         rename(F);
642         auto Args = F->getFunctionType()->params();
643         Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
644         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
645         return true;
646       }
647     }
648     break;
649   }
650   case 'i':
651   case 'l': {
652     bool IsLifetimeStart = Name.startswith("lifetime.start");
653     if (IsLifetimeStart || Name.startswith("invariant.start")) {
654       Intrinsic::ID ID = IsLifetimeStart ?
655         Intrinsic::lifetime_start : Intrinsic::invariant_start;
656       auto Args = F->getFunctionType()->params();
657       Type* ObjectPtr[1] = {Args[1]};
658       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
659         rename(F);
660         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
661         return true;
662       }
663     }
664 
665     bool IsLifetimeEnd = Name.startswith("lifetime.end");
666     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
667       Intrinsic::ID ID = IsLifetimeEnd ?
668         Intrinsic::lifetime_end : Intrinsic::invariant_end;
669 
670       auto Args = F->getFunctionType()->params();
671       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
672       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
673         rename(F);
674         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
675         return true;
676       }
677     }
678     if (Name.startswith("invariant.group.barrier")) {
679       // Rename invariant.group.barrier to launder.invariant.group
680       auto Args = F->getFunctionType()->params();
681       Type* ObjectPtr[1] = {Args[0]};
682       rename(F);
683       NewFn = Intrinsic::getDeclaration(F->getParent(),
684           Intrinsic::launder_invariant_group, ObjectPtr);
685       return true;
686 
687     }
688 
689     break;
690   }
691   case 'm': {
692     if (Name.startswith("masked.load.")) {
693       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
694       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
695         rename(F);
696         NewFn = Intrinsic::getDeclaration(F->getParent(),
697                                           Intrinsic::masked_load,
698                                           Tys);
699         return true;
700       }
701     }
702     if (Name.startswith("masked.store.")) {
703       auto Args = F->getFunctionType()->params();
704       Type *Tys[] = { Args[0], Args[1] };
705       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
706         rename(F);
707         NewFn = Intrinsic::getDeclaration(F->getParent(),
708                                           Intrinsic::masked_store,
709                                           Tys);
710         return true;
711       }
712     }
713     // Renaming gather/scatter intrinsics with no address space overloading
714     // to the new overload which includes an address space
715     if (Name.startswith("masked.gather.")) {
716       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
717       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
718         rename(F);
719         NewFn = Intrinsic::getDeclaration(F->getParent(),
720                                           Intrinsic::masked_gather, Tys);
721         return true;
722       }
723     }
724     if (Name.startswith("masked.scatter.")) {
725       auto Args = F->getFunctionType()->params();
726       Type *Tys[] = {Args[0], Args[1]};
727       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
728         rename(F);
729         NewFn = Intrinsic::getDeclaration(F->getParent(),
730                                           Intrinsic::masked_scatter, Tys);
731         return true;
732       }
733     }
734     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
735     // alignment parameter to embedding the alignment as an attribute of
736     // the pointer args.
737     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
738       rename(F);
739       // Get the types of dest, src, and len
740       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
741       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
742                                         ParamTypes);
743       return true;
744     }
745     if (Name.startswith("memmove.") && F->arg_size() == 5) {
746       rename(F);
747       // Get the types of dest, src, and len
748       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
749       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
750                                         ParamTypes);
751       return true;
752     }
753     if (Name.startswith("memset.") && F->arg_size() == 5) {
754       rename(F);
755       // Get the types of dest, and len
756       const auto *FT = F->getFunctionType();
757       Type *ParamTypes[2] = {
758           FT->getParamType(0), // Dest
759           FT->getParamType(2)  // len
760       };
761       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
762                                         ParamTypes);
763       return true;
764     }
765     break;
766   }
767   case 'n': {
768     if (Name.startswith("nvvm.")) {
769       Name = Name.substr(5);
770 
771       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
772       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
773                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
774                               .Case("clz.i", Intrinsic::ctlz)
775                               .Case("popc.i", Intrinsic::ctpop)
776                               .Default(Intrinsic::not_intrinsic);
777       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
778         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
779                                           {F->getReturnType()});
780         return true;
781       }
782 
783       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
784       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
785       //
786       // TODO: We could add lohi.i2d.
787       bool Expand = StringSwitch<bool>(Name)
788                         .Cases("abs.i", "abs.ll", true)
789                         .Cases("clz.ll", "popc.ll", "h2f", true)
790                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
791                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
792                         .StartsWith("atomic.load.add.f32.p", true)
793                         .StartsWith("atomic.load.add.f64.p", true)
794                         .Default(false);
795       if (Expand) {
796         NewFn = nullptr;
797         return true;
798       }
799     }
800     break;
801   }
802   case 'o':
803     // We only need to change the name to match the mangling including the
804     // address space.
805     if (Name.startswith("objectsize.")) {
806       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
807       if (F->arg_size() == 2 || F->arg_size() == 3 ||
808           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
809         rename(F);
810         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
811                                           Tys);
812         return true;
813       }
814     }
815     break;
816 
817   case 'p':
818     if (Name == "prefetch") {
819       // Handle address space overloading.
820       Type *Tys[] = {F->arg_begin()->getType()};
821       if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
822         rename(F);
823         NewFn =
824             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
825         return true;
826       }
827     }
828     break;
829 
830   case 's':
831     if (Name == "stackprotectorcheck") {
832       NewFn = nullptr;
833       return true;
834     }
835     break;
836 
837   case 'x':
838     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
839       return true;
840   }
841   // Remangle our intrinsic since we upgrade the mangling
842   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
843   if (Result != None) {
844     NewFn = Result.getValue();
845     return true;
846   }
847 
848   //  This may not belong here. This function is effectively being overloaded
849   //  to both detect an intrinsic which needs upgrading, and to provide the
850   //  upgraded form of the intrinsic. We should perhaps have two separate
851   //  functions for this.
852   return false;
853 }
854 
855 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
856   NewFn = nullptr;
857   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
858   assert(F != NewFn && "Intrinsic function upgraded to the same function");
859 
860   // Upgrade intrinsic attributes.  This does not change the function.
861   if (NewFn)
862     F = NewFn;
863   if (Intrinsic::ID id = F->getIntrinsicID())
864     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
865   return Upgraded;
866 }
867 
868 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
869   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
870                           GV->getName() == "llvm.global_dtors")) ||
871       !GV->hasInitializer())
872     return nullptr;
873   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
874   if (!ATy)
875     return nullptr;
876   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
877   if (!STy || STy->getNumElements() != 2)
878     return nullptr;
879 
880   LLVMContext &C = GV->getContext();
881   IRBuilder<> IRB(C);
882   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
883                                IRB.getInt8PtrTy());
884   Constant *Init = GV->getInitializer();
885   unsigned N = Init->getNumOperands();
886   std::vector<Constant *> NewCtors(N);
887   for (unsigned i = 0; i != N; ++i) {
888     auto Ctor = cast<Constant>(Init->getOperand(i));
889     NewCtors[i] = ConstantStruct::get(
890         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
891         Constant::getNullValue(IRB.getInt8PtrTy()));
892   }
893   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
894 
895   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
896                             NewInit, GV->getName());
897 }
898 
899 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
900 // to byte shuffles.
901 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
902                                          Value *Op, unsigned Shift) {
903   auto *ResultTy = cast<VectorType>(Op->getType());
904   unsigned NumElts = ResultTy->getNumElements() * 8;
905 
906   // Bitcast from a 64-bit element type to a byte element type.
907   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
908   Op = Builder.CreateBitCast(Op, VecTy, "cast");
909 
910   // We'll be shuffling in zeroes.
911   Value *Res = Constant::getNullValue(VecTy);
912 
913   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
914   // we'll just return the zero vector.
915   if (Shift < 16) {
916     int Idxs[64];
917     // 256/512-bit version is split into 2/4 16-byte lanes.
918     for (unsigned l = 0; l != NumElts; l += 16)
919       for (unsigned i = 0; i != 16; ++i) {
920         unsigned Idx = NumElts + i - Shift;
921         if (Idx < NumElts)
922           Idx -= NumElts - 16; // end of lane, switch operand.
923         Idxs[l + i] = Idx + l;
924       }
925 
926     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
927   }
928 
929   // Bitcast back to a 64-bit element type.
930   return Builder.CreateBitCast(Res, ResultTy, "cast");
931 }
932 
933 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
934 // to byte shuffles.
935 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
936                                          unsigned Shift) {
937   auto *ResultTy = cast<VectorType>(Op->getType());
938   unsigned NumElts = ResultTy->getNumElements() * 8;
939 
940   // Bitcast from a 64-bit element type to a byte element type.
941   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
942   Op = Builder.CreateBitCast(Op, VecTy, "cast");
943 
944   // We'll be shuffling in zeroes.
945   Value *Res = Constant::getNullValue(VecTy);
946 
947   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
948   // we'll just return the zero vector.
949   if (Shift < 16) {
950     int Idxs[64];
951     // 256/512-bit version is split into 2/4 16-byte lanes.
952     for (unsigned l = 0; l != NumElts; l += 16)
953       for (unsigned i = 0; i != 16; ++i) {
954         unsigned Idx = i + Shift;
955         if (Idx >= 16)
956           Idx += NumElts - 16; // end of lane, switch operand.
957         Idxs[l + i] = Idx + l;
958       }
959 
960     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
961   }
962 
963   // Bitcast back to a 64-bit element type.
964   return Builder.CreateBitCast(Res, ResultTy, "cast");
965 }
966 
967 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
968                             unsigned NumElts) {
969   llvm::VectorType *MaskTy = FixedVectorType::get(
970       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
971   Mask = Builder.CreateBitCast(Mask, MaskTy);
972 
973   // If we have less than 8 elements, then the starting mask was an i8 and
974   // we need to extract down to the right number of elements.
975   if (NumElts < 8) {
976     int Indices[4];
977     for (unsigned i = 0; i != NumElts; ++i)
978       Indices[i] = i;
979     Mask = Builder.CreateShuffleVector(Mask, Mask,
980                                        makeArrayRef(Indices, NumElts),
981                                        "extract");
982   }
983 
984   return Mask;
985 }
986 
987 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
988                             Value *Op0, Value *Op1) {
989   // If the mask is all ones just emit the first operation.
990   if (const auto *C = dyn_cast<Constant>(Mask))
991     if (C->isAllOnesValue())
992       return Op0;
993 
994   Mask = getX86MaskVec(Builder, Mask,
995                        cast<VectorType>(Op0->getType())->getNumElements());
996   return Builder.CreateSelect(Mask, Op0, Op1);
997 }
998 
999 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1000                                   Value *Op0, Value *Op1) {
1001   // If the mask is all ones just emit the first operation.
1002   if (const auto *C = dyn_cast<Constant>(Mask))
1003     if (C->isAllOnesValue())
1004       return Op0;
1005 
1006   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1007                                       Mask->getType()->getIntegerBitWidth());
1008   Mask = Builder.CreateBitCast(Mask, MaskTy);
1009   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1010   return Builder.CreateSelect(Mask, Op0, Op1);
1011 }
1012 
1013 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1014 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1015 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1016 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1017                                         Value *Op1, Value *Shift,
1018                                         Value *Passthru, Value *Mask,
1019                                         bool IsVALIGN) {
1020   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1021 
1022   unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
1023   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1024   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1025   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1026 
1027   // Mask the immediate for VALIGN.
1028   if (IsVALIGN)
1029     ShiftVal &= (NumElts - 1);
1030 
1031   // If palignr is shifting the pair of vectors more than the size of two
1032   // lanes, emit zero.
1033   if (ShiftVal >= 32)
1034     return llvm::Constant::getNullValue(Op0->getType());
1035 
1036   // If palignr is shifting the pair of input vectors more than one lane,
1037   // but less than two lanes, convert to shifting in zeroes.
1038   if (ShiftVal > 16) {
1039     ShiftVal -= 16;
1040     Op1 = Op0;
1041     Op0 = llvm::Constant::getNullValue(Op0->getType());
1042   }
1043 
1044   int Indices[64];
1045   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1046   for (unsigned l = 0; l < NumElts; l += 16) {
1047     for (unsigned i = 0; i != 16; ++i) {
1048       unsigned Idx = ShiftVal + i;
1049       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1050         Idx += NumElts - 16; // End of lane, switch operand.
1051       Indices[l + i] = Idx + l;
1052     }
1053   }
1054 
1055   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1056                                              makeArrayRef(Indices, NumElts),
1057                                              "palignr");
1058 
1059   return EmitX86Select(Builder, Mask, Align, Passthru);
1060 }
1061 
1062 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1063                                           bool ZeroMask, bool IndexForm) {
1064   Type *Ty = CI.getType();
1065   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1066   unsigned EltWidth = Ty->getScalarSizeInBits();
1067   bool IsFloat = Ty->isFPOrFPVectorTy();
1068   Intrinsic::ID IID;
1069   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1070     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1071   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1072     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1073   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1074     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1075   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1076     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1077   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1078     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1079   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1080     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1081   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1082     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1083   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1084     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1085   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1086     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1087   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1088     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1089   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1090     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1091   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1092     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1093   else if (VecWidth == 128 && EltWidth == 16)
1094     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1095   else if (VecWidth == 256 && EltWidth == 16)
1096     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1097   else if (VecWidth == 512 && EltWidth == 16)
1098     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1099   else if (VecWidth == 128 && EltWidth == 8)
1100     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1101   else if (VecWidth == 256 && EltWidth == 8)
1102     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1103   else if (VecWidth == 512 && EltWidth == 8)
1104     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1105   else
1106     llvm_unreachable("Unexpected intrinsic");
1107 
1108   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1109                     CI.getArgOperand(2) };
1110 
1111   // If this isn't index form we need to swap operand 0 and 1.
1112   if (!IndexForm)
1113     std::swap(Args[0], Args[1]);
1114 
1115   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1116                                 Args);
1117   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1118                              : Builder.CreateBitCast(CI.getArgOperand(1),
1119                                                      Ty);
1120   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1121 }
1122 
1123 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1124                                             bool IsSigned, bool IsAddition) {
1125   Type *Ty = CI.getType();
1126   Value *Op0 = CI.getOperand(0);
1127   Value *Op1 = CI.getOperand(1);
1128 
1129   Intrinsic::ID IID =
1130       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1131                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1132   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1133   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1134 
1135   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1136     Value *VecSrc = CI.getOperand(2);
1137     Value *Mask = CI.getOperand(3);
1138     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1139   }
1140   return Res;
1141 }
1142 
1143 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1144                                bool IsRotateRight) {
1145   Type *Ty = CI.getType();
1146   Value *Src = CI.getArgOperand(0);
1147   Value *Amt = CI.getArgOperand(1);
1148 
1149   // Amount may be scalar immediate, in which case create a splat vector.
1150   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1151   // we only care about the lowest log2 bits anyway.
1152   if (Amt->getType() != Ty) {
1153     unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
1154     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1155     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1156   }
1157 
1158   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1159   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1160   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1161 
1162   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1163     Value *VecSrc = CI.getOperand(2);
1164     Value *Mask = CI.getOperand(3);
1165     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1166   }
1167   return Res;
1168 }
1169 
1170 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1171                               bool IsSigned) {
1172   Type *Ty = CI.getType();
1173   Value *LHS = CI.getArgOperand(0);
1174   Value *RHS = CI.getArgOperand(1);
1175 
1176   CmpInst::Predicate Pred;
1177   switch (Imm) {
1178   case 0x0:
1179     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1180     break;
1181   case 0x1:
1182     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1183     break;
1184   case 0x2:
1185     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1186     break;
1187   case 0x3:
1188     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1189     break;
1190   case 0x4:
1191     Pred = ICmpInst::ICMP_EQ;
1192     break;
1193   case 0x5:
1194     Pred = ICmpInst::ICMP_NE;
1195     break;
1196   case 0x6:
1197     return Constant::getNullValue(Ty); // FALSE
1198   case 0x7:
1199     return Constant::getAllOnesValue(Ty); // TRUE
1200   default:
1201     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1202   }
1203 
1204   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1205   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1206   return Ext;
1207 }
1208 
1209 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1210                                     bool IsShiftRight, bool ZeroMask) {
1211   Type *Ty = CI.getType();
1212   Value *Op0 = CI.getArgOperand(0);
1213   Value *Op1 = CI.getArgOperand(1);
1214   Value *Amt = CI.getArgOperand(2);
1215 
1216   if (IsShiftRight)
1217     std::swap(Op0, Op1);
1218 
1219   // Amount may be scalar immediate, in which case create a splat vector.
1220   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1221   // we only care about the lowest log2 bits anyway.
1222   if (Amt->getType() != Ty) {
1223     unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
1224     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1225     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1226   }
1227 
1228   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1229   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1230   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1231 
1232   unsigned NumArgs = CI.getNumArgOperands();
1233   if (NumArgs >= 4) { // For masked intrinsics.
1234     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1235                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1236                                    CI.getArgOperand(0);
1237     Value *Mask = CI.getOperand(NumArgs - 1);
1238     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1239   }
1240   return Res;
1241 }
1242 
1243 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1244                                  Value *Ptr, Value *Data, Value *Mask,
1245                                  bool Aligned) {
1246   // Cast the pointer to the right type.
1247   Ptr = Builder.CreateBitCast(Ptr,
1248                               llvm::PointerType::getUnqual(Data->getType()));
1249   const Align Alignment =
1250       Aligned
1251           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1252           : Align(1);
1253 
1254   // If the mask is all ones just emit a regular store.
1255   if (const auto *C = dyn_cast<Constant>(Mask))
1256     if (C->isAllOnesValue())
1257       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1258 
1259   // Convert the mask from an integer type to a vector of i1.
1260   unsigned NumElts = cast<VectorType>(Data->getType())->getNumElements();
1261   Mask = getX86MaskVec(Builder, Mask, NumElts);
1262   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1263 }
1264 
1265 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1266                                 Value *Ptr, Value *Passthru, Value *Mask,
1267                                 bool Aligned) {
1268   Type *ValTy = Passthru->getType();
1269   // Cast the pointer to the right type.
1270   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1271   const Align Alignment =
1272       Aligned
1273           ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1274                   8)
1275           : Align(1);
1276 
1277   // If the mask is all ones just emit a regular store.
1278   if (const auto *C = dyn_cast<Constant>(Mask))
1279     if (C->isAllOnesValue())
1280       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1281 
1282   // Convert the mask from an integer type to a vector of i1.
1283   unsigned NumElts = cast<VectorType>(Passthru->getType())->getNumElements();
1284   Mask = getX86MaskVec(Builder, Mask, NumElts);
1285   return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1286 }
1287 
1288 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1289   Value *Op0 = CI.getArgOperand(0);
1290   llvm::Type *Ty = Op0->getType();
1291   Value *Zero = llvm::Constant::getNullValue(Ty);
1292   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1293   Value *Neg = Builder.CreateNeg(Op0);
1294   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1295 
1296   if (CI.getNumArgOperands() == 3)
1297     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1298 
1299   return Res;
1300 }
1301 
1302 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1303                                ICmpInst::Predicate Pred) {
1304   Value *Op0 = CI.getArgOperand(0);
1305   Value *Op1 = CI.getArgOperand(1);
1306   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1307   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1308 
1309   if (CI.getNumArgOperands() == 4)
1310     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1311 
1312   return Res;
1313 }
1314 
1315 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1316   Type *Ty = CI.getType();
1317 
1318   // Arguments have a vXi32 type so cast to vXi64.
1319   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1320   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1321 
1322   if (IsSigned) {
1323     // Shift left then arithmetic shift right.
1324     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1325     LHS = Builder.CreateShl(LHS, ShiftAmt);
1326     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1327     RHS = Builder.CreateShl(RHS, ShiftAmt);
1328     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1329   } else {
1330     // Clear the upper bits.
1331     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1332     LHS = Builder.CreateAnd(LHS, Mask);
1333     RHS = Builder.CreateAnd(RHS, Mask);
1334   }
1335 
1336   Value *Res = Builder.CreateMul(LHS, RHS);
1337 
1338   if (CI.getNumArgOperands() == 4)
1339     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1340 
1341   return Res;
1342 }
1343 
1344 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1345 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1346                                      Value *Mask) {
1347   unsigned NumElts = cast<VectorType>(Vec->getType())->getNumElements();
1348   if (Mask) {
1349     const auto *C = dyn_cast<Constant>(Mask);
1350     if (!C || !C->isAllOnesValue())
1351       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1352   }
1353 
1354   if (NumElts < 8) {
1355     int Indices[8];
1356     for (unsigned i = 0; i != NumElts; ++i)
1357       Indices[i] = i;
1358     for (unsigned i = NumElts; i != 8; ++i)
1359       Indices[i] = NumElts + i % NumElts;
1360     Vec = Builder.CreateShuffleVector(Vec,
1361                                       Constant::getNullValue(Vec->getType()),
1362                                       Indices);
1363   }
1364   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1365 }
1366 
1367 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1368                                    unsigned CC, bool Signed) {
1369   Value *Op0 = CI.getArgOperand(0);
1370   unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
1371 
1372   Value *Cmp;
1373   if (CC == 3) {
1374     Cmp = Constant::getNullValue(
1375         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1376   } else if (CC == 7) {
1377     Cmp = Constant::getAllOnesValue(
1378         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1379   } else {
1380     ICmpInst::Predicate Pred;
1381     switch (CC) {
1382     default: llvm_unreachable("Unknown condition code");
1383     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1384     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1385     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1386     case 4: Pred = ICmpInst::ICMP_NE;  break;
1387     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1388     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1389     }
1390     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1391   }
1392 
1393   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1394 
1395   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1396 }
1397 
1398 // Replace a masked intrinsic with an older unmasked intrinsic.
1399 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1400                                     Intrinsic::ID IID) {
1401   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1402   Value *Rep = Builder.CreateCall(Intrin,
1403                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1404   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1405 }
1406 
1407 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1408   Value* A = CI.getArgOperand(0);
1409   Value* B = CI.getArgOperand(1);
1410   Value* Src = CI.getArgOperand(2);
1411   Value* Mask = CI.getArgOperand(3);
1412 
1413   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1414   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1415   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1416   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1417   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1418   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1419 }
1420 
1421 
1422 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1423   Value* Op = CI.getArgOperand(0);
1424   Type* ReturnOp = CI.getType();
1425   unsigned NumElts = cast<VectorType>(CI.getType())->getNumElements();
1426   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1427   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1428 }
1429 
1430 // Replace intrinsic with unmasked version and a select.
1431 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1432                                       CallInst &CI, Value *&Rep) {
1433   Name = Name.substr(12); // Remove avx512.mask.
1434 
1435   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1436   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1437   Intrinsic::ID IID;
1438   if (Name.startswith("max.p")) {
1439     if (VecWidth == 128 && EltWidth == 32)
1440       IID = Intrinsic::x86_sse_max_ps;
1441     else if (VecWidth == 128 && EltWidth == 64)
1442       IID = Intrinsic::x86_sse2_max_pd;
1443     else if (VecWidth == 256 && EltWidth == 32)
1444       IID = Intrinsic::x86_avx_max_ps_256;
1445     else if (VecWidth == 256 && EltWidth == 64)
1446       IID = Intrinsic::x86_avx_max_pd_256;
1447     else
1448       llvm_unreachable("Unexpected intrinsic");
1449   } else if (Name.startswith("min.p")) {
1450     if (VecWidth == 128 && EltWidth == 32)
1451       IID = Intrinsic::x86_sse_min_ps;
1452     else if (VecWidth == 128 && EltWidth == 64)
1453       IID = Intrinsic::x86_sse2_min_pd;
1454     else if (VecWidth == 256 && EltWidth == 32)
1455       IID = Intrinsic::x86_avx_min_ps_256;
1456     else if (VecWidth == 256 && EltWidth == 64)
1457       IID = Intrinsic::x86_avx_min_pd_256;
1458     else
1459       llvm_unreachable("Unexpected intrinsic");
1460   } else if (Name.startswith("pshuf.b.")) {
1461     if (VecWidth == 128)
1462       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1463     else if (VecWidth == 256)
1464       IID = Intrinsic::x86_avx2_pshuf_b;
1465     else if (VecWidth == 512)
1466       IID = Intrinsic::x86_avx512_pshuf_b_512;
1467     else
1468       llvm_unreachable("Unexpected intrinsic");
1469   } else if (Name.startswith("pmul.hr.sw.")) {
1470     if (VecWidth == 128)
1471       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1472     else if (VecWidth == 256)
1473       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1474     else if (VecWidth == 512)
1475       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1476     else
1477       llvm_unreachable("Unexpected intrinsic");
1478   } else if (Name.startswith("pmulh.w.")) {
1479     if (VecWidth == 128)
1480       IID = Intrinsic::x86_sse2_pmulh_w;
1481     else if (VecWidth == 256)
1482       IID = Intrinsic::x86_avx2_pmulh_w;
1483     else if (VecWidth == 512)
1484       IID = Intrinsic::x86_avx512_pmulh_w_512;
1485     else
1486       llvm_unreachable("Unexpected intrinsic");
1487   } else if (Name.startswith("pmulhu.w.")) {
1488     if (VecWidth == 128)
1489       IID = Intrinsic::x86_sse2_pmulhu_w;
1490     else if (VecWidth == 256)
1491       IID = Intrinsic::x86_avx2_pmulhu_w;
1492     else if (VecWidth == 512)
1493       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1494     else
1495       llvm_unreachable("Unexpected intrinsic");
1496   } else if (Name.startswith("pmaddw.d.")) {
1497     if (VecWidth == 128)
1498       IID = Intrinsic::x86_sse2_pmadd_wd;
1499     else if (VecWidth == 256)
1500       IID = Intrinsic::x86_avx2_pmadd_wd;
1501     else if (VecWidth == 512)
1502       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1503     else
1504       llvm_unreachable("Unexpected intrinsic");
1505   } else if (Name.startswith("pmaddubs.w.")) {
1506     if (VecWidth == 128)
1507       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1508     else if (VecWidth == 256)
1509       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1510     else if (VecWidth == 512)
1511       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1512     else
1513       llvm_unreachable("Unexpected intrinsic");
1514   } else if (Name.startswith("packsswb.")) {
1515     if (VecWidth == 128)
1516       IID = Intrinsic::x86_sse2_packsswb_128;
1517     else if (VecWidth == 256)
1518       IID = Intrinsic::x86_avx2_packsswb;
1519     else if (VecWidth == 512)
1520       IID = Intrinsic::x86_avx512_packsswb_512;
1521     else
1522       llvm_unreachable("Unexpected intrinsic");
1523   } else if (Name.startswith("packssdw.")) {
1524     if (VecWidth == 128)
1525       IID = Intrinsic::x86_sse2_packssdw_128;
1526     else if (VecWidth == 256)
1527       IID = Intrinsic::x86_avx2_packssdw;
1528     else if (VecWidth == 512)
1529       IID = Intrinsic::x86_avx512_packssdw_512;
1530     else
1531       llvm_unreachable("Unexpected intrinsic");
1532   } else if (Name.startswith("packuswb.")) {
1533     if (VecWidth == 128)
1534       IID = Intrinsic::x86_sse2_packuswb_128;
1535     else if (VecWidth == 256)
1536       IID = Intrinsic::x86_avx2_packuswb;
1537     else if (VecWidth == 512)
1538       IID = Intrinsic::x86_avx512_packuswb_512;
1539     else
1540       llvm_unreachable("Unexpected intrinsic");
1541   } else if (Name.startswith("packusdw.")) {
1542     if (VecWidth == 128)
1543       IID = Intrinsic::x86_sse41_packusdw;
1544     else if (VecWidth == 256)
1545       IID = Intrinsic::x86_avx2_packusdw;
1546     else if (VecWidth == 512)
1547       IID = Intrinsic::x86_avx512_packusdw_512;
1548     else
1549       llvm_unreachable("Unexpected intrinsic");
1550   } else if (Name.startswith("vpermilvar.")) {
1551     if (VecWidth == 128 && EltWidth == 32)
1552       IID = Intrinsic::x86_avx_vpermilvar_ps;
1553     else if (VecWidth == 128 && EltWidth == 64)
1554       IID = Intrinsic::x86_avx_vpermilvar_pd;
1555     else if (VecWidth == 256 && EltWidth == 32)
1556       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1557     else if (VecWidth == 256 && EltWidth == 64)
1558       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1559     else if (VecWidth == 512 && EltWidth == 32)
1560       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1561     else if (VecWidth == 512 && EltWidth == 64)
1562       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1563     else
1564       llvm_unreachable("Unexpected intrinsic");
1565   } else if (Name == "cvtpd2dq.256") {
1566     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1567   } else if (Name == "cvtpd2ps.256") {
1568     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1569   } else if (Name == "cvttpd2dq.256") {
1570     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1571   } else if (Name == "cvttps2dq.128") {
1572     IID = Intrinsic::x86_sse2_cvttps2dq;
1573   } else if (Name == "cvttps2dq.256") {
1574     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1575   } else if (Name.startswith("permvar.")) {
1576     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1577     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1578       IID = Intrinsic::x86_avx2_permps;
1579     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1580       IID = Intrinsic::x86_avx2_permd;
1581     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1582       IID = Intrinsic::x86_avx512_permvar_df_256;
1583     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1584       IID = Intrinsic::x86_avx512_permvar_di_256;
1585     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1586       IID = Intrinsic::x86_avx512_permvar_sf_512;
1587     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1588       IID = Intrinsic::x86_avx512_permvar_si_512;
1589     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1590       IID = Intrinsic::x86_avx512_permvar_df_512;
1591     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1592       IID = Intrinsic::x86_avx512_permvar_di_512;
1593     else if (VecWidth == 128 && EltWidth == 16)
1594       IID = Intrinsic::x86_avx512_permvar_hi_128;
1595     else if (VecWidth == 256 && EltWidth == 16)
1596       IID = Intrinsic::x86_avx512_permvar_hi_256;
1597     else if (VecWidth == 512 && EltWidth == 16)
1598       IID = Intrinsic::x86_avx512_permvar_hi_512;
1599     else if (VecWidth == 128 && EltWidth == 8)
1600       IID = Intrinsic::x86_avx512_permvar_qi_128;
1601     else if (VecWidth == 256 && EltWidth == 8)
1602       IID = Intrinsic::x86_avx512_permvar_qi_256;
1603     else if (VecWidth == 512 && EltWidth == 8)
1604       IID = Intrinsic::x86_avx512_permvar_qi_512;
1605     else
1606       llvm_unreachable("Unexpected intrinsic");
1607   } else if (Name.startswith("dbpsadbw.")) {
1608     if (VecWidth == 128)
1609       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1610     else if (VecWidth == 256)
1611       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1612     else if (VecWidth == 512)
1613       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1614     else
1615       llvm_unreachable("Unexpected intrinsic");
1616   } else if (Name.startswith("pmultishift.qb.")) {
1617     if (VecWidth == 128)
1618       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1619     else if (VecWidth == 256)
1620       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1621     else if (VecWidth == 512)
1622       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1623     else
1624       llvm_unreachable("Unexpected intrinsic");
1625   } else if (Name.startswith("conflict.")) {
1626     if (Name[9] == 'd' && VecWidth == 128)
1627       IID = Intrinsic::x86_avx512_conflict_d_128;
1628     else if (Name[9] == 'd' && VecWidth == 256)
1629       IID = Intrinsic::x86_avx512_conflict_d_256;
1630     else if (Name[9] == 'd' && VecWidth == 512)
1631       IID = Intrinsic::x86_avx512_conflict_d_512;
1632     else if (Name[9] == 'q' && VecWidth == 128)
1633       IID = Intrinsic::x86_avx512_conflict_q_128;
1634     else if (Name[9] == 'q' && VecWidth == 256)
1635       IID = Intrinsic::x86_avx512_conflict_q_256;
1636     else if (Name[9] == 'q' && VecWidth == 512)
1637       IID = Intrinsic::x86_avx512_conflict_q_512;
1638     else
1639       llvm_unreachable("Unexpected intrinsic");
1640   } else if (Name.startswith("pavg.")) {
1641     if (Name[5] == 'b' && VecWidth == 128)
1642       IID = Intrinsic::x86_sse2_pavg_b;
1643     else if (Name[5] == 'b' && VecWidth == 256)
1644       IID = Intrinsic::x86_avx2_pavg_b;
1645     else if (Name[5] == 'b' && VecWidth == 512)
1646       IID = Intrinsic::x86_avx512_pavg_b_512;
1647     else if (Name[5] == 'w' && VecWidth == 128)
1648       IID = Intrinsic::x86_sse2_pavg_w;
1649     else if (Name[5] == 'w' && VecWidth == 256)
1650       IID = Intrinsic::x86_avx2_pavg_w;
1651     else if (Name[5] == 'w' && VecWidth == 512)
1652       IID = Intrinsic::x86_avx512_pavg_w_512;
1653     else
1654       llvm_unreachable("Unexpected intrinsic");
1655   } else
1656     return false;
1657 
1658   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1659                                CI.arg_operands().end());
1660   Args.pop_back();
1661   Args.pop_back();
1662   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1663                            Args);
1664   unsigned NumArgs = CI.getNumArgOperands();
1665   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1666                       CI.getArgOperand(NumArgs - 2));
1667   return true;
1668 }
1669 
1670 /// Upgrade comment in call to inline asm that represents an objc retain release
1671 /// marker.
1672 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1673   size_t Pos;
1674   if (AsmStr->find("mov\tfp") == 0 &&
1675       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1676       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1677     AsmStr->replace(Pos, 1, ";");
1678   }
1679   return;
1680 }
1681 
1682 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1683 /// provided to seamlessly integrate with existing context.
1684 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1685   Function *F = CI->getCalledFunction();
1686   LLVMContext &C = CI->getContext();
1687   IRBuilder<> Builder(C);
1688   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1689 
1690   assert(F && "Intrinsic call is not direct?");
1691 
1692   if (!NewFn) {
1693     // Get the Function's name.
1694     StringRef Name = F->getName();
1695 
1696     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1697     Name = Name.substr(5);
1698 
1699     bool IsX86 = Name.startswith("x86.");
1700     if (IsX86)
1701       Name = Name.substr(4);
1702     bool IsNVVM = Name.startswith("nvvm.");
1703     if (IsNVVM)
1704       Name = Name.substr(5);
1705 
1706     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1707       Module *M = F->getParent();
1708       SmallVector<Metadata *, 1> Elts;
1709       Elts.push_back(
1710           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1711       MDNode *Node = MDNode::get(C, Elts);
1712 
1713       Value *Arg0 = CI->getArgOperand(0);
1714       Value *Arg1 = CI->getArgOperand(1);
1715 
1716       // Nontemporal (unaligned) store of the 0'th element of the float/double
1717       // vector.
1718       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1719       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1720       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1721       Value *Extract =
1722           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1723 
1724       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1725       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1726 
1727       // Remove intrinsic.
1728       CI->eraseFromParent();
1729       return;
1730     }
1731 
1732     if (IsX86 && (Name.startswith("avx.movnt.") ||
1733                   Name.startswith("avx512.storent."))) {
1734       Module *M = F->getParent();
1735       SmallVector<Metadata *, 1> Elts;
1736       Elts.push_back(
1737           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1738       MDNode *Node = MDNode::get(C, Elts);
1739 
1740       Value *Arg0 = CI->getArgOperand(0);
1741       Value *Arg1 = CI->getArgOperand(1);
1742 
1743       // Convert the type of the pointer to a pointer to the stored type.
1744       Value *BC = Builder.CreateBitCast(Arg0,
1745                                         PointerType::getUnqual(Arg1->getType()),
1746                                         "cast");
1747       StoreInst *SI = Builder.CreateAlignedStore(
1748           Arg1, BC,
1749           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1750       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1751 
1752       // Remove intrinsic.
1753       CI->eraseFromParent();
1754       return;
1755     }
1756 
1757     if (IsX86 && Name == "sse2.storel.dq") {
1758       Value *Arg0 = CI->getArgOperand(0);
1759       Value *Arg1 = CI->getArgOperand(1);
1760 
1761       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1762       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1763       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1764       Value *BC = Builder.CreateBitCast(Arg0,
1765                                         PointerType::getUnqual(Elt->getType()),
1766                                         "cast");
1767       Builder.CreateAlignedStore(Elt, BC, Align(1));
1768 
1769       // Remove intrinsic.
1770       CI->eraseFromParent();
1771       return;
1772     }
1773 
1774     if (IsX86 && (Name.startswith("sse.storeu.") ||
1775                   Name.startswith("sse2.storeu.") ||
1776                   Name.startswith("avx.storeu."))) {
1777       Value *Arg0 = CI->getArgOperand(0);
1778       Value *Arg1 = CI->getArgOperand(1);
1779 
1780       Arg0 = Builder.CreateBitCast(Arg0,
1781                                    PointerType::getUnqual(Arg1->getType()),
1782                                    "cast");
1783       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1784 
1785       // Remove intrinsic.
1786       CI->eraseFromParent();
1787       return;
1788     }
1789 
1790     if (IsX86 && Name == "avx512.mask.store.ss") {
1791       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1792       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1793                          Mask, false);
1794 
1795       // Remove intrinsic.
1796       CI->eraseFromParent();
1797       return;
1798     }
1799 
1800     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1801       // "avx512.mask.storeu." or "avx512.mask.store."
1802       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1803       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1804                          CI->getArgOperand(2), Aligned);
1805 
1806       // Remove intrinsic.
1807       CI->eraseFromParent();
1808       return;
1809     }
1810 
1811     Value *Rep;
1812     // Upgrade packed integer vector compare intrinsics to compare instructions.
1813     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1814                   Name.startswith("avx2.pcmp"))) {
1815       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1816       bool CmpEq = Name[9] == 'e';
1817       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1818                                CI->getArgOperand(0), CI->getArgOperand(1));
1819       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1820     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1821       Type *ExtTy = Type::getInt32Ty(C);
1822       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1823         ExtTy = Type::getInt64Ty(C);
1824       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1825                          ExtTy->getPrimitiveSizeInBits();
1826       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1827       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1828     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1829                          Name == "sse2.sqrt.sd")) {
1830       Value *Vec = CI->getArgOperand(0);
1831       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1832       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1833                                                  Intrinsic::sqrt, Elt0->getType());
1834       Elt0 = Builder.CreateCall(Intr, Elt0);
1835       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1836     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1837                          Name.startswith("sse2.sqrt.p") ||
1838                          Name.startswith("sse.sqrt.p"))) {
1839       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1840                                                          Intrinsic::sqrt,
1841                                                          CI->getType()),
1842                                {CI->getArgOperand(0)});
1843     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1844       if (CI->getNumArgOperands() == 4 &&
1845           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1846            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1847         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1848                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1849 
1850         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1851         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1852                                                            IID), Args);
1853       } else {
1854         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1855                                                            Intrinsic::sqrt,
1856                                                            CI->getType()),
1857                                  {CI->getArgOperand(0)});
1858       }
1859       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1860                           CI->getArgOperand(1));
1861     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1862                          Name.startswith("avx512.ptestnm"))) {
1863       Value *Op0 = CI->getArgOperand(0);
1864       Value *Op1 = CI->getArgOperand(1);
1865       Value *Mask = CI->getArgOperand(2);
1866       Rep = Builder.CreateAnd(Op0, Op1);
1867       llvm::Type *Ty = Op0->getType();
1868       Value *Zero = llvm::Constant::getNullValue(Ty);
1869       ICmpInst::Predicate Pred =
1870         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1871       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1872       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1873     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1874       unsigned NumElts =
1875           cast<VectorType>(CI->getArgOperand(1)->getType())->getNumElements();
1876       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1877       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1878                           CI->getArgOperand(1));
1879     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1880       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1881       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1882       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1883       int Indices[64];
1884       for (unsigned i = 0; i != NumElts; ++i)
1885         Indices[i] = i;
1886 
1887       // First extract half of each vector. This gives better codegen than
1888       // doing it in a single shuffle.
1889       LHS = Builder.CreateShuffleVector(LHS, LHS,
1890                                         makeArrayRef(Indices, NumElts / 2));
1891       RHS = Builder.CreateShuffleVector(RHS, RHS,
1892                                         makeArrayRef(Indices, NumElts / 2));
1893       // Concat the vectors.
1894       // NOTE: Operands have to be swapped to match intrinsic definition.
1895       Rep = Builder.CreateShuffleVector(RHS, LHS,
1896                                         makeArrayRef(Indices, NumElts));
1897       Rep = Builder.CreateBitCast(Rep, CI->getType());
1898     } else if (IsX86 && Name == "avx512.kand.w") {
1899       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1900       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1901       Rep = Builder.CreateAnd(LHS, RHS);
1902       Rep = Builder.CreateBitCast(Rep, CI->getType());
1903     } else if (IsX86 && Name == "avx512.kandn.w") {
1904       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1905       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1906       LHS = Builder.CreateNot(LHS);
1907       Rep = Builder.CreateAnd(LHS, RHS);
1908       Rep = Builder.CreateBitCast(Rep, CI->getType());
1909     } else if (IsX86 && Name == "avx512.kor.w") {
1910       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1911       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1912       Rep = Builder.CreateOr(LHS, RHS);
1913       Rep = Builder.CreateBitCast(Rep, CI->getType());
1914     } else if (IsX86 && Name == "avx512.kxor.w") {
1915       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1916       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1917       Rep = Builder.CreateXor(LHS, RHS);
1918       Rep = Builder.CreateBitCast(Rep, CI->getType());
1919     } else if (IsX86 && Name == "avx512.kxnor.w") {
1920       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1921       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1922       LHS = Builder.CreateNot(LHS);
1923       Rep = Builder.CreateXor(LHS, RHS);
1924       Rep = Builder.CreateBitCast(Rep, CI->getType());
1925     } else if (IsX86 && Name == "avx512.knot.w") {
1926       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1927       Rep = Builder.CreateNot(Rep);
1928       Rep = Builder.CreateBitCast(Rep, CI->getType());
1929     } else if (IsX86 &&
1930                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1931       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1932       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1933       Rep = Builder.CreateOr(LHS, RHS);
1934       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1935       Value *C;
1936       if (Name[14] == 'c')
1937         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1938       else
1939         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1940       Rep = Builder.CreateICmpEQ(Rep, C);
1941       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1942     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1943                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1944                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1945                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1946       Type *I32Ty = Type::getInt32Ty(C);
1947       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1948                                                  ConstantInt::get(I32Ty, 0));
1949       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1950                                                  ConstantInt::get(I32Ty, 0));
1951       Value *EltOp;
1952       if (Name.contains(".add."))
1953         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1954       else if (Name.contains(".sub."))
1955         EltOp = Builder.CreateFSub(Elt0, Elt1);
1956       else if (Name.contains(".mul."))
1957         EltOp = Builder.CreateFMul(Elt0, Elt1);
1958       else
1959         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1960       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1961                                         ConstantInt::get(I32Ty, 0));
1962     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1963       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1964       bool CmpEq = Name[16] == 'e';
1965       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1966     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1967       Type *OpTy = CI->getArgOperand(0)->getType();
1968       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1969       Intrinsic::ID IID;
1970       switch (VecWidth) {
1971       default: llvm_unreachable("Unexpected intrinsic");
1972       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1973       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1974       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1975       }
1976 
1977       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1978                                { CI->getOperand(0), CI->getArgOperand(1) });
1979       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1980     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1981       Type *OpTy = CI->getArgOperand(0)->getType();
1982       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1983       unsigned EltWidth = OpTy->getScalarSizeInBits();
1984       Intrinsic::ID IID;
1985       if (VecWidth == 128 && EltWidth == 32)
1986         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1987       else if (VecWidth == 256 && EltWidth == 32)
1988         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1989       else if (VecWidth == 512 && EltWidth == 32)
1990         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1991       else if (VecWidth == 128 && EltWidth == 64)
1992         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1993       else if (VecWidth == 256 && EltWidth == 64)
1994         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1995       else if (VecWidth == 512 && EltWidth == 64)
1996         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1997       else
1998         llvm_unreachable("Unexpected intrinsic");
1999 
2000       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2001                                { CI->getOperand(0), CI->getArgOperand(1) });
2002       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2003     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
2004       Type *OpTy = CI->getArgOperand(0)->getType();
2005       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2006       unsigned EltWidth = OpTy->getScalarSizeInBits();
2007       Intrinsic::ID IID;
2008       if (VecWidth == 128 && EltWidth == 32)
2009         IID = Intrinsic::x86_avx512_cmp_ps_128;
2010       else if (VecWidth == 256 && EltWidth == 32)
2011         IID = Intrinsic::x86_avx512_cmp_ps_256;
2012       else if (VecWidth == 512 && EltWidth == 32)
2013         IID = Intrinsic::x86_avx512_cmp_ps_512;
2014       else if (VecWidth == 128 && EltWidth == 64)
2015         IID = Intrinsic::x86_avx512_cmp_pd_128;
2016       else if (VecWidth == 256 && EltWidth == 64)
2017         IID = Intrinsic::x86_avx512_cmp_pd_256;
2018       else if (VecWidth == 512 && EltWidth == 64)
2019         IID = Intrinsic::x86_avx512_cmp_pd_512;
2020       else
2021         llvm_unreachable("Unexpected intrinsic");
2022 
2023       SmallVector<Value *, 4> Args;
2024       Args.push_back(CI->getArgOperand(0));
2025       Args.push_back(CI->getArgOperand(1));
2026       Args.push_back(CI->getArgOperand(2));
2027       if (CI->getNumArgOperands() == 5)
2028         Args.push_back(CI->getArgOperand(4));
2029 
2030       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2031                                Args);
2032       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
2033     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
2034                Name[16] != 'p') {
2035       // Integer compare intrinsics.
2036       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2037       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2038     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2039       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2040       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2041     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2042                          Name.startswith("avx512.cvtw2mask.") ||
2043                          Name.startswith("avx512.cvtd2mask.") ||
2044                          Name.startswith("avx512.cvtq2mask."))) {
2045       Value *Op = CI->getArgOperand(0);
2046       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2047       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2048       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2049     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2050                         Name == "ssse3.pabs.w.128" ||
2051                         Name == "ssse3.pabs.d.128" ||
2052                         Name.startswith("avx2.pabs") ||
2053                         Name.startswith("avx512.mask.pabs"))) {
2054       Rep = upgradeAbs(Builder, *CI);
2055     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2056                          Name == "sse2.pmaxs.w" ||
2057                          Name == "sse41.pmaxsd" ||
2058                          Name.startswith("avx2.pmaxs") ||
2059                          Name.startswith("avx512.mask.pmaxs"))) {
2060       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2061     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2062                          Name == "sse41.pmaxuw" ||
2063                          Name == "sse41.pmaxud" ||
2064                          Name.startswith("avx2.pmaxu") ||
2065                          Name.startswith("avx512.mask.pmaxu"))) {
2066       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2067     } else if (IsX86 && (Name == "sse41.pminsb" ||
2068                          Name == "sse2.pmins.w" ||
2069                          Name == "sse41.pminsd" ||
2070                          Name.startswith("avx2.pmins") ||
2071                          Name.startswith("avx512.mask.pmins"))) {
2072       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2073     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2074                          Name == "sse41.pminuw" ||
2075                          Name == "sse41.pminud" ||
2076                          Name.startswith("avx2.pminu") ||
2077                          Name.startswith("avx512.mask.pminu"))) {
2078       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2079     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2080                          Name == "avx2.pmulu.dq" ||
2081                          Name == "avx512.pmulu.dq.512" ||
2082                          Name.startswith("avx512.mask.pmulu.dq."))) {
2083       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2084     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2085                          Name == "avx2.pmul.dq" ||
2086                          Name == "avx512.pmul.dq.512" ||
2087                          Name.startswith("avx512.mask.pmul.dq."))) {
2088       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2089     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2090                          Name == "sse2.cvtsi2sd" ||
2091                          Name == "sse.cvtsi642ss" ||
2092                          Name == "sse2.cvtsi642sd")) {
2093       Rep = Builder.CreateSIToFP(
2094           CI->getArgOperand(1),
2095           cast<VectorType>(CI->getType())->getElementType());
2096       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2097     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2098       Rep = Builder.CreateUIToFP(
2099           CI->getArgOperand(1),
2100           cast<VectorType>(CI->getType())->getElementType());
2101       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2102     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2103       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2104       Rep = Builder.CreateFPExt(
2105           Rep, cast<VectorType>(CI->getType())->getElementType());
2106       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2107     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2108                          Name == "sse2.cvtdq2ps" ||
2109                          Name == "avx.cvtdq2.pd.256" ||
2110                          Name == "avx.cvtdq2.ps.256" ||
2111                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2112                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2113                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2114                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2115                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2116                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2117                          Name == "avx512.mask.cvtqq2ps.256" ||
2118                          Name == "avx512.mask.cvtqq2ps.512" ||
2119                          Name == "avx512.mask.cvtuqq2ps.256" ||
2120                          Name == "avx512.mask.cvtuqq2ps.512" ||
2121                          Name == "sse2.cvtps2pd" ||
2122                          Name == "avx.cvt.ps2.pd.256" ||
2123                          Name == "avx512.mask.cvtps2pd.128" ||
2124                          Name == "avx512.mask.cvtps2pd.256")) {
2125       auto *DstTy = cast<VectorType>(CI->getType());
2126       Rep = CI->getArgOperand(0);
2127       auto *SrcTy = cast<VectorType>(Rep->getType());
2128 
2129       unsigned NumDstElts = DstTy->getNumElements();
2130       if (NumDstElts < SrcTy->getNumElements()) {
2131         assert(NumDstElts == 2 && "Unexpected vector size");
2132         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2133       }
2134 
2135       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2136       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2137       if (IsPS2PD)
2138         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2139       else if (CI->getNumArgOperands() == 4 &&
2140                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2141                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2142         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2143                                        : Intrinsic::x86_avx512_sitofp_round;
2144         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2145                                                 { DstTy, SrcTy });
2146         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2147       } else {
2148         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2149                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2150       }
2151 
2152       if (CI->getNumArgOperands() >= 3)
2153         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2154                             CI->getArgOperand(1));
2155     } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2156                          Name.startswith("vcvtph2ps."))) {
2157       auto *DstTy = cast<VectorType>(CI->getType());
2158       Rep = CI->getArgOperand(0);
2159       auto *SrcTy = cast<VectorType>(Rep->getType());
2160       unsigned NumDstElts = DstTy->getNumElements();
2161       if (NumDstElts != SrcTy->getNumElements()) {
2162         assert(NumDstElts == 4 && "Unexpected vector size");
2163         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2164       }
2165       Rep = Builder.CreateBitCast(
2166           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2167       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2168       if (CI->getNumArgOperands() >= 3)
2169         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2170                             CI->getArgOperand(1));
2171     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2172       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2173                               CI->getArgOperand(1), CI->getArgOperand(2),
2174                               /*Aligned*/false);
2175     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2176       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2177                               CI->getArgOperand(1),CI->getArgOperand(2),
2178                               /*Aligned*/true);
2179     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2180       auto *ResultTy = cast<VectorType>(CI->getType());
2181       Type *PtrTy = ResultTy->getElementType();
2182 
2183       // Cast the pointer to element type.
2184       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2185                                          llvm::PointerType::getUnqual(PtrTy));
2186 
2187       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2188                                      ResultTy->getNumElements());
2189 
2190       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2191                                                 Intrinsic::masked_expandload,
2192                                                 ResultTy);
2193       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2194     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2195       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2196       Type *PtrTy = ResultTy->getElementType();
2197 
2198       // Cast the pointer to element type.
2199       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2200                                          llvm::PointerType::getUnqual(PtrTy));
2201 
2202       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2203                                      ResultTy->getNumElements());
2204 
2205       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2206                                                 Intrinsic::masked_compressstore,
2207                                                 ResultTy);
2208       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2209     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2210                          Name.startswith("avx512.mask.expand."))) {
2211       auto *ResultTy = cast<VectorType>(CI->getType());
2212 
2213       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2214                                      ResultTy->getNumElements());
2215 
2216       bool IsCompress = Name[12] == 'c';
2217       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2218                                      : Intrinsic::x86_avx512_mask_expand;
2219       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2220       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2221                                        MaskVec });
2222     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2223       bool IsSigned;
2224       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2225           Name.endswith("uq"))
2226         IsSigned = false;
2227       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2228                Name.endswith("q"))
2229         IsSigned = true;
2230       else
2231         llvm_unreachable("Unknown suffix");
2232 
2233       unsigned Imm;
2234       if (CI->getNumArgOperands() == 3) {
2235         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2236       } else {
2237         Name = Name.substr(9); // strip off "xop.vpcom"
2238         if (Name.startswith("lt"))
2239           Imm = 0;
2240         else if (Name.startswith("le"))
2241           Imm = 1;
2242         else if (Name.startswith("gt"))
2243           Imm = 2;
2244         else if (Name.startswith("ge"))
2245           Imm = 3;
2246         else if (Name.startswith("eq"))
2247           Imm = 4;
2248         else if (Name.startswith("ne"))
2249           Imm = 5;
2250         else if (Name.startswith("false"))
2251           Imm = 6;
2252         else if (Name.startswith("true"))
2253           Imm = 7;
2254         else
2255           llvm_unreachable("Unknown condition");
2256       }
2257 
2258       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2259     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2260       Value *Sel = CI->getArgOperand(2);
2261       Value *NotSel = Builder.CreateNot(Sel);
2262       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2263       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2264       Rep = Builder.CreateOr(Sel0, Sel1);
2265     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2266                          Name.startswith("avx512.prol") ||
2267                          Name.startswith("avx512.mask.prol"))) {
2268       Rep = upgradeX86Rotate(Builder, *CI, false);
2269     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2270                          Name.startswith("avx512.mask.pror"))) {
2271       Rep = upgradeX86Rotate(Builder, *CI, true);
2272     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2273                          Name.startswith("avx512.mask.vpshld") ||
2274                          Name.startswith("avx512.maskz.vpshld"))) {
2275       bool ZeroMask = Name[11] == 'z';
2276       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2277     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2278                          Name.startswith("avx512.mask.vpshrd") ||
2279                          Name.startswith("avx512.maskz.vpshrd"))) {
2280       bool ZeroMask = Name[11] == 'z';
2281       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2282     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2283       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2284                                                Intrinsic::x86_sse42_crc32_32_8);
2285       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2286       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2287       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2288     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2289                          Name.startswith("avx512.vbroadcast.s"))) {
2290       // Replace broadcasts with a series of insertelements.
2291       auto *VecTy = cast<VectorType>(CI->getType());
2292       Type *EltTy = VecTy->getElementType();
2293       unsigned EltNum = VecTy->getNumElements();
2294       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2295                                           EltTy->getPointerTo());
2296       Value *Load = Builder.CreateLoad(EltTy, Cast);
2297       Type *I32Ty = Type::getInt32Ty(C);
2298       Rep = UndefValue::get(VecTy);
2299       for (unsigned I = 0; I < EltNum; ++I)
2300         Rep = Builder.CreateInsertElement(Rep, Load,
2301                                           ConstantInt::get(I32Ty, I));
2302     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2303                          Name.startswith("sse41.pmovzx") ||
2304                          Name.startswith("avx2.pmovsx") ||
2305                          Name.startswith("avx2.pmovzx") ||
2306                          Name.startswith("avx512.mask.pmovsx") ||
2307                          Name.startswith("avx512.mask.pmovzx"))) {
2308       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2309       VectorType *DstTy = cast<VectorType>(CI->getType());
2310       unsigned NumDstElts = DstTy->getNumElements();
2311 
2312       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2313       SmallVector<int, 8> ShuffleMask(NumDstElts);
2314       for (unsigned i = 0; i != NumDstElts; ++i)
2315         ShuffleMask[i] = i;
2316 
2317       Value *SV = Builder.CreateShuffleVector(
2318           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2319 
2320       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2321       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2322                    : Builder.CreateZExt(SV, DstTy);
2323       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2324       if (CI->getNumArgOperands() == 3)
2325         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2326                             CI->getArgOperand(1));
2327     } else if (Name == "avx512.mask.pmov.qd.256" ||
2328                Name == "avx512.mask.pmov.qd.512" ||
2329                Name == "avx512.mask.pmov.wb.256" ||
2330                Name == "avx512.mask.pmov.wb.512") {
2331       Type *Ty = CI->getArgOperand(1)->getType();
2332       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2333       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2334                           CI->getArgOperand(1));
2335     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2336                          Name == "avx2.vbroadcasti128")) {
2337       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2338       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2339       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2340       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2341       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2342                                             PointerType::getUnqual(VT));
2343       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2344       if (NumSrcElts == 2)
2345         Rep = Builder.CreateShuffleVector(
2346             Load, UndefValue::get(Load->getType()), ArrayRef<int>{0, 1, 0, 1});
2347       else
2348         Rep =
2349             Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2350                                         ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2351     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2352                          Name.startswith("avx512.mask.shuf.f"))) {
2353       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2354       Type *VT = CI->getType();
2355       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2356       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2357       unsigned ControlBitsMask = NumLanes - 1;
2358       unsigned NumControlBits = NumLanes / 2;
2359       SmallVector<int, 8> ShuffleMask(0);
2360 
2361       for (unsigned l = 0; l != NumLanes; ++l) {
2362         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2363         // We actually need the other source.
2364         if (l >= NumLanes / 2)
2365           LaneMask += NumLanes;
2366         for (unsigned i = 0; i != NumElementsInLane; ++i)
2367           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2368       }
2369       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2370                                         CI->getArgOperand(1), ShuffleMask);
2371       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2372                           CI->getArgOperand(3));
2373     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2374                          Name.startswith("avx512.mask.broadcasti"))) {
2375       unsigned NumSrcElts =
2376           cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
2377       unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();
2378 
2379       SmallVector<int, 8> ShuffleMask(NumDstElts);
2380       for (unsigned i = 0; i != NumDstElts; ++i)
2381         ShuffleMask[i] = i % NumSrcElts;
2382 
2383       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2384                                         CI->getArgOperand(0),
2385                                         ShuffleMask);
2386       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2387                           CI->getArgOperand(1));
2388     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2389                          Name.startswith("avx2.vbroadcast") ||
2390                          Name.startswith("avx512.pbroadcast") ||
2391                          Name.startswith("avx512.mask.broadcast.s"))) {
2392       // Replace vp?broadcasts with a vector shuffle.
2393       Value *Op = CI->getArgOperand(0);
2394       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2395       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2396       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2397                                         Constant::getNullValue(MaskTy));
2398 
2399       if (CI->getNumArgOperands() == 3)
2400         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2401                             CI->getArgOperand(1));
2402     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2403                          Name.startswith("sse2.psubs.") ||
2404                          Name.startswith("avx2.padds.") ||
2405                          Name.startswith("avx2.psubs.") ||
2406                          Name.startswith("avx512.padds.") ||
2407                          Name.startswith("avx512.psubs.") ||
2408                          Name.startswith("avx512.mask.padds.") ||
2409                          Name.startswith("avx512.mask.psubs."))) {
2410       bool IsAdd = Name.contains(".padds");
2411       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2412     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2413                          Name.startswith("sse2.psubus.") ||
2414                          Name.startswith("avx2.paddus.") ||
2415                          Name.startswith("avx2.psubus.") ||
2416                          Name.startswith("avx512.mask.paddus.") ||
2417                          Name.startswith("avx512.mask.psubus."))) {
2418       bool IsAdd = Name.contains(".paddus");
2419       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2420     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2421       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2422                                       CI->getArgOperand(1),
2423                                       CI->getArgOperand(2),
2424                                       CI->getArgOperand(3),
2425                                       CI->getArgOperand(4),
2426                                       false);
2427     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2428       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2429                                       CI->getArgOperand(1),
2430                                       CI->getArgOperand(2),
2431                                       CI->getArgOperand(3),
2432                                       CI->getArgOperand(4),
2433                                       true);
2434     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2435                          Name == "avx2.psll.dq")) {
2436       // 128/256-bit shift left specified in bits.
2437       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2438       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2439                                        Shift / 8); // Shift is in bits.
2440     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2441                          Name == "avx2.psrl.dq")) {
2442       // 128/256-bit shift right specified in bits.
2443       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2444       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2445                                        Shift / 8); // Shift is in bits.
2446     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2447                          Name == "avx2.psll.dq.bs" ||
2448                          Name == "avx512.psll.dq.512")) {
2449       // 128/256/512-bit shift left specified in bytes.
2450       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2451       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2452     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2453                          Name == "avx2.psrl.dq.bs" ||
2454                          Name == "avx512.psrl.dq.512")) {
2455       // 128/256/512-bit shift right specified in bytes.
2456       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2457       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2458     } else if (IsX86 && (Name == "sse41.pblendw" ||
2459                          Name.startswith("sse41.blendp") ||
2460                          Name.startswith("avx.blend.p") ||
2461                          Name == "avx2.pblendw" ||
2462                          Name.startswith("avx2.pblendd."))) {
2463       Value *Op0 = CI->getArgOperand(0);
2464       Value *Op1 = CI->getArgOperand(1);
2465       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2466       VectorType *VecTy = cast<VectorType>(CI->getType());
2467       unsigned NumElts = VecTy->getNumElements();
2468 
2469       SmallVector<int, 16> Idxs(NumElts);
2470       for (unsigned i = 0; i != NumElts; ++i)
2471         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2472 
2473       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2474     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2475                          Name == "avx2.vinserti128" ||
2476                          Name.startswith("avx512.mask.insert"))) {
2477       Value *Op0 = CI->getArgOperand(0);
2478       Value *Op1 = CI->getArgOperand(1);
2479       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2480       unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
2481       unsigned SrcNumElts = cast<VectorType>(Op1->getType())->getNumElements();
2482       unsigned Scale = DstNumElts / SrcNumElts;
2483 
2484       // Mask off the high bits of the immediate value; hardware ignores those.
2485       Imm = Imm % Scale;
2486 
2487       // Extend the second operand into a vector the size of the destination.
2488       Value *UndefV = UndefValue::get(Op1->getType());
2489       SmallVector<int, 8> Idxs(DstNumElts);
2490       for (unsigned i = 0; i != SrcNumElts; ++i)
2491         Idxs[i] = i;
2492       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2493         Idxs[i] = SrcNumElts;
2494       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2495 
2496       // Insert the second operand into the first operand.
2497 
2498       // Note that there is no guarantee that instruction lowering will actually
2499       // produce a vinsertf128 instruction for the created shuffles. In
2500       // particular, the 0 immediate case involves no lane changes, so it can
2501       // be handled as a blend.
2502 
2503       // Example of shuffle mask for 32-bit elements:
2504       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2505       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2506 
2507       // First fill with identify mask.
2508       for (unsigned i = 0; i != DstNumElts; ++i)
2509         Idxs[i] = i;
2510       // Then replace the elements where we need to insert.
2511       for (unsigned i = 0; i != SrcNumElts; ++i)
2512         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2513       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2514 
2515       // If the intrinsic has a mask operand, handle that.
2516       if (CI->getNumArgOperands() == 5)
2517         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2518                             CI->getArgOperand(3));
2519     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2520                          Name == "avx2.vextracti128" ||
2521                          Name.startswith("avx512.mask.vextract"))) {
2522       Value *Op0 = CI->getArgOperand(0);
2523       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2524       unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
2525       unsigned SrcNumElts = cast<VectorType>(Op0->getType())->getNumElements();
2526       unsigned Scale = SrcNumElts / DstNumElts;
2527 
2528       // Mask off the high bits of the immediate value; hardware ignores those.
2529       Imm = Imm % Scale;
2530 
2531       // Get indexes for the subvector of the input vector.
2532       SmallVector<int, 8> Idxs(DstNumElts);
2533       for (unsigned i = 0; i != DstNumElts; ++i) {
2534         Idxs[i] = i + (Imm * DstNumElts);
2535       }
2536       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2537 
2538       // If the intrinsic has a mask operand, handle that.
2539       if (CI->getNumArgOperands() == 4)
2540         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2541                             CI->getArgOperand(2));
2542     } else if (!IsX86 && Name == "stackprotectorcheck") {
2543       Rep = nullptr;
2544     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2545                          Name.startswith("avx512.mask.perm.di."))) {
2546       Value *Op0 = CI->getArgOperand(0);
2547       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2548       VectorType *VecTy = cast<VectorType>(CI->getType());
2549       unsigned NumElts = VecTy->getNumElements();
2550 
2551       SmallVector<int, 8> Idxs(NumElts);
2552       for (unsigned i = 0; i != NumElts; ++i)
2553         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2554 
2555       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2556 
2557       if (CI->getNumArgOperands() == 4)
2558         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2559                             CI->getArgOperand(2));
2560     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2561                          Name == "avx2.vperm2i128")) {
2562       // The immediate permute control byte looks like this:
2563       //    [1:0] - select 128 bits from sources for low half of destination
2564       //    [2]   - ignore
2565       //    [3]   - zero low half of destination
2566       //    [5:4] - select 128 bits from sources for high half of destination
2567       //    [6]   - ignore
2568       //    [7]   - zero high half of destination
2569 
2570       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2571 
2572       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2573       unsigned HalfSize = NumElts / 2;
2574       SmallVector<int, 8> ShuffleMask(NumElts);
2575 
2576       // Determine which operand(s) are actually in use for this instruction.
2577       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2578       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2579 
2580       // If needed, replace operands based on zero mask.
2581       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2582       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2583 
2584       // Permute low half of result.
2585       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2586       for (unsigned i = 0; i < HalfSize; ++i)
2587         ShuffleMask[i] = StartIndex + i;
2588 
2589       // Permute high half of result.
2590       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2591       for (unsigned i = 0; i < HalfSize; ++i)
2592         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2593 
2594       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2595 
2596     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2597                          Name == "sse2.pshuf.d" ||
2598                          Name.startswith("avx512.mask.vpermil.p") ||
2599                          Name.startswith("avx512.mask.pshuf.d."))) {
2600       Value *Op0 = CI->getArgOperand(0);
2601       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2602       VectorType *VecTy = cast<VectorType>(CI->getType());
2603       unsigned NumElts = VecTy->getNumElements();
2604       // Calculate the size of each index in the immediate.
2605       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2606       unsigned IdxMask = ((1 << IdxSize) - 1);
2607 
2608       SmallVector<int, 8> Idxs(NumElts);
2609       // Lookup the bits for this element, wrapping around the immediate every
2610       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2611       // to offset by the first index of each group.
2612       for (unsigned i = 0; i != NumElts; ++i)
2613         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2614 
2615       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2616 
2617       if (CI->getNumArgOperands() == 4)
2618         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2619                             CI->getArgOperand(2));
2620     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2621                          Name.startswith("avx512.mask.pshufl.w."))) {
2622       Value *Op0 = CI->getArgOperand(0);
2623       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2624       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2625 
2626       SmallVector<int, 16> Idxs(NumElts);
2627       for (unsigned l = 0; l != NumElts; l += 8) {
2628         for (unsigned i = 0; i != 4; ++i)
2629           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2630         for (unsigned i = 4; i != 8; ++i)
2631           Idxs[i + l] = i + l;
2632       }
2633 
2634       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2635 
2636       if (CI->getNumArgOperands() == 4)
2637         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2638                             CI->getArgOperand(2));
2639     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2640                          Name.startswith("avx512.mask.pshufh.w."))) {
2641       Value *Op0 = CI->getArgOperand(0);
2642       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2643       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2644 
2645       SmallVector<int, 16> Idxs(NumElts);
2646       for (unsigned l = 0; l != NumElts; l += 8) {
2647         for (unsigned i = 0; i != 4; ++i)
2648           Idxs[i + l] = i + l;
2649         for (unsigned i = 0; i != 4; ++i)
2650           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2651       }
2652 
2653       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2654 
2655       if (CI->getNumArgOperands() == 4)
2656         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2657                             CI->getArgOperand(2));
2658     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2659       Value *Op0 = CI->getArgOperand(0);
2660       Value *Op1 = CI->getArgOperand(1);
2661       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2662       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2663 
2664       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2665       unsigned HalfLaneElts = NumLaneElts / 2;
2666 
2667       SmallVector<int, 16> Idxs(NumElts);
2668       for (unsigned i = 0; i != NumElts; ++i) {
2669         // Base index is the starting element of the lane.
2670         Idxs[i] = i - (i % NumLaneElts);
2671         // If we are half way through the lane switch to the other source.
2672         if ((i % NumLaneElts) >= HalfLaneElts)
2673           Idxs[i] += NumElts;
2674         // Now select the specific element. By adding HalfLaneElts bits from
2675         // the immediate. Wrapping around the immediate every 8-bits.
2676         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2677       }
2678 
2679       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2680 
2681       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2682                           CI->getArgOperand(3));
2683     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2684                          Name.startswith("avx512.mask.movshdup") ||
2685                          Name.startswith("avx512.mask.movsldup"))) {
2686       Value *Op0 = CI->getArgOperand(0);
2687       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
2688       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2689 
2690       unsigned Offset = 0;
2691       if (Name.startswith("avx512.mask.movshdup."))
2692         Offset = 1;
2693 
2694       SmallVector<int, 16> Idxs(NumElts);
2695       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2696         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2697           Idxs[i + l + 0] = i + l + Offset;
2698           Idxs[i + l + 1] = i + l + Offset;
2699         }
2700 
2701       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2702 
2703       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2704                           CI->getArgOperand(1));
2705     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2706                          Name.startswith("avx512.mask.unpckl."))) {
2707       Value *Op0 = CI->getArgOperand(0);
2708       Value *Op1 = CI->getArgOperand(1);
2709       int NumElts = cast<VectorType>(CI->getType())->getNumElements();
2710       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2711 
2712       SmallVector<int, 64> Idxs(NumElts);
2713       for (int l = 0; l != NumElts; l += NumLaneElts)
2714         for (int i = 0; i != NumLaneElts; ++i)
2715           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2716 
2717       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2718 
2719       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2720                           CI->getArgOperand(2));
2721     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2722                          Name.startswith("avx512.mask.unpckh."))) {
2723       Value *Op0 = CI->getArgOperand(0);
2724       Value *Op1 = CI->getArgOperand(1);
2725       int NumElts = cast<VectorType>(CI->getType())->getNumElements();
2726       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2727 
2728       SmallVector<int, 64> Idxs(NumElts);
2729       for (int l = 0; l != NumElts; l += NumLaneElts)
2730         for (int i = 0; i != NumLaneElts; ++i)
2731           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2732 
2733       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2734 
2735       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2736                           CI->getArgOperand(2));
2737     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2738                          Name.startswith("avx512.mask.pand."))) {
2739       VectorType *FTy = cast<VectorType>(CI->getType());
2740       VectorType *ITy = VectorType::getInteger(FTy);
2741       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2742                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2743       Rep = Builder.CreateBitCast(Rep, FTy);
2744       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2745                           CI->getArgOperand(2));
2746     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2747                          Name.startswith("avx512.mask.pandn."))) {
2748       VectorType *FTy = cast<VectorType>(CI->getType());
2749       VectorType *ITy = VectorType::getInteger(FTy);
2750       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2751       Rep = Builder.CreateAnd(Rep,
2752                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2753       Rep = Builder.CreateBitCast(Rep, FTy);
2754       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2755                           CI->getArgOperand(2));
2756     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2757                          Name.startswith("avx512.mask.por."))) {
2758       VectorType *FTy = cast<VectorType>(CI->getType());
2759       VectorType *ITy = VectorType::getInteger(FTy);
2760       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2761                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2762       Rep = Builder.CreateBitCast(Rep, FTy);
2763       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2764                           CI->getArgOperand(2));
2765     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2766                          Name.startswith("avx512.mask.pxor."))) {
2767       VectorType *FTy = cast<VectorType>(CI->getType());
2768       VectorType *ITy = VectorType::getInteger(FTy);
2769       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2770                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2771       Rep = Builder.CreateBitCast(Rep, FTy);
2772       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2773                           CI->getArgOperand(2));
2774     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2775       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2776       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2777                           CI->getArgOperand(2));
2778     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2779       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2780       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2781                           CI->getArgOperand(2));
2782     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2783       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2784       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2785                           CI->getArgOperand(2));
2786     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2787       if (Name.endswith(".512")) {
2788         Intrinsic::ID IID;
2789         if (Name[17] == 's')
2790           IID = Intrinsic::x86_avx512_add_ps_512;
2791         else
2792           IID = Intrinsic::x86_avx512_add_pd_512;
2793 
2794         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2795                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2796                                    CI->getArgOperand(4) });
2797       } else {
2798         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2799       }
2800       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2801                           CI->getArgOperand(2));
2802     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2803       if (Name.endswith(".512")) {
2804         Intrinsic::ID IID;
2805         if (Name[17] == 's')
2806           IID = Intrinsic::x86_avx512_div_ps_512;
2807         else
2808           IID = Intrinsic::x86_avx512_div_pd_512;
2809 
2810         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2811                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2812                                    CI->getArgOperand(4) });
2813       } else {
2814         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2815       }
2816       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2817                           CI->getArgOperand(2));
2818     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2819       if (Name.endswith(".512")) {
2820         Intrinsic::ID IID;
2821         if (Name[17] == 's')
2822           IID = Intrinsic::x86_avx512_mul_ps_512;
2823         else
2824           IID = Intrinsic::x86_avx512_mul_pd_512;
2825 
2826         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2827                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2828                                    CI->getArgOperand(4) });
2829       } else {
2830         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2831       }
2832       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2833                           CI->getArgOperand(2));
2834     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2835       if (Name.endswith(".512")) {
2836         Intrinsic::ID IID;
2837         if (Name[17] == 's')
2838           IID = Intrinsic::x86_avx512_sub_ps_512;
2839         else
2840           IID = Intrinsic::x86_avx512_sub_pd_512;
2841 
2842         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2843                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2844                                    CI->getArgOperand(4) });
2845       } else {
2846         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2847       }
2848       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2849                           CI->getArgOperand(2));
2850     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2851                          Name.startswith("avx512.mask.min.p")) &&
2852                Name.drop_front(18) == ".512") {
2853       bool IsDouble = Name[17] == 'd';
2854       bool IsMin = Name[13] == 'i';
2855       static const Intrinsic::ID MinMaxTbl[2][2] = {
2856         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2857         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2858       };
2859       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2860 
2861       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2862                                { CI->getArgOperand(0), CI->getArgOperand(1),
2863                                  CI->getArgOperand(4) });
2864       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2865                           CI->getArgOperand(2));
2866     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2867       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2868                                                          Intrinsic::ctlz,
2869                                                          CI->getType()),
2870                                { CI->getArgOperand(0), Builder.getInt1(false) });
2871       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2872                           CI->getArgOperand(1));
2873     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2874       bool IsImmediate = Name[16] == 'i' ||
2875                          (Name.size() > 18 && Name[18] == 'i');
2876       bool IsVariable = Name[16] == 'v';
2877       char Size = Name[16] == '.' ? Name[17] :
2878                   Name[17] == '.' ? Name[18] :
2879                   Name[18] == '.' ? Name[19] :
2880                                     Name[20];
2881 
2882       Intrinsic::ID IID;
2883       if (IsVariable && Name[17] != '.') {
2884         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2885           IID = Intrinsic::x86_avx2_psllv_q;
2886         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2887           IID = Intrinsic::x86_avx2_psllv_q_256;
2888         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2889           IID = Intrinsic::x86_avx2_psllv_d;
2890         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2891           IID = Intrinsic::x86_avx2_psllv_d_256;
2892         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2893           IID = Intrinsic::x86_avx512_psllv_w_128;
2894         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2895           IID = Intrinsic::x86_avx512_psllv_w_256;
2896         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2897           IID = Intrinsic::x86_avx512_psllv_w_512;
2898         else
2899           llvm_unreachable("Unexpected size");
2900       } else if (Name.endswith(".128")) {
2901         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2902           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2903                             : Intrinsic::x86_sse2_psll_d;
2904         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2905           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2906                             : Intrinsic::x86_sse2_psll_q;
2907         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2908           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2909                             : Intrinsic::x86_sse2_psll_w;
2910         else
2911           llvm_unreachable("Unexpected size");
2912       } else if (Name.endswith(".256")) {
2913         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2914           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2915                             : Intrinsic::x86_avx2_psll_d;
2916         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2917           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2918                             : Intrinsic::x86_avx2_psll_q;
2919         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2920           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2921                             : Intrinsic::x86_avx2_psll_w;
2922         else
2923           llvm_unreachable("Unexpected size");
2924       } else {
2925         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2926           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2927                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2928                               Intrinsic::x86_avx512_psll_d_512;
2929         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2930           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2931                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2932                               Intrinsic::x86_avx512_psll_q_512;
2933         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2934           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2935                             : Intrinsic::x86_avx512_psll_w_512;
2936         else
2937           llvm_unreachable("Unexpected size");
2938       }
2939 
2940       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2941     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2942       bool IsImmediate = Name[16] == 'i' ||
2943                          (Name.size() > 18 && Name[18] == 'i');
2944       bool IsVariable = Name[16] == 'v';
2945       char Size = Name[16] == '.' ? Name[17] :
2946                   Name[17] == '.' ? Name[18] :
2947                   Name[18] == '.' ? Name[19] :
2948                                     Name[20];
2949 
2950       Intrinsic::ID IID;
2951       if (IsVariable && Name[17] != '.') {
2952         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2953           IID = Intrinsic::x86_avx2_psrlv_q;
2954         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2955           IID = Intrinsic::x86_avx2_psrlv_q_256;
2956         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2957           IID = Intrinsic::x86_avx2_psrlv_d;
2958         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2959           IID = Intrinsic::x86_avx2_psrlv_d_256;
2960         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2961           IID = Intrinsic::x86_avx512_psrlv_w_128;
2962         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2963           IID = Intrinsic::x86_avx512_psrlv_w_256;
2964         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2965           IID = Intrinsic::x86_avx512_psrlv_w_512;
2966         else
2967           llvm_unreachable("Unexpected size");
2968       } else if (Name.endswith(".128")) {
2969         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2970           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2971                             : Intrinsic::x86_sse2_psrl_d;
2972         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2973           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2974                             : Intrinsic::x86_sse2_psrl_q;
2975         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2976           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2977                             : Intrinsic::x86_sse2_psrl_w;
2978         else
2979           llvm_unreachable("Unexpected size");
2980       } else if (Name.endswith(".256")) {
2981         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2982           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2983                             : Intrinsic::x86_avx2_psrl_d;
2984         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2985           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2986                             : Intrinsic::x86_avx2_psrl_q;
2987         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2988           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2989                             : Intrinsic::x86_avx2_psrl_w;
2990         else
2991           llvm_unreachable("Unexpected size");
2992       } else {
2993         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2994           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2995                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2996                               Intrinsic::x86_avx512_psrl_d_512;
2997         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2998           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2999                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3000                               Intrinsic::x86_avx512_psrl_q_512;
3001         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3002           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3003                             : Intrinsic::x86_avx512_psrl_w_512;
3004         else
3005           llvm_unreachable("Unexpected size");
3006       }
3007 
3008       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3009     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3010       bool IsImmediate = Name[16] == 'i' ||
3011                          (Name.size() > 18 && Name[18] == 'i');
3012       bool IsVariable = Name[16] == 'v';
3013       char Size = Name[16] == '.' ? Name[17] :
3014                   Name[17] == '.' ? Name[18] :
3015                   Name[18] == '.' ? Name[19] :
3016                                     Name[20];
3017 
3018       Intrinsic::ID IID;
3019       if (IsVariable && Name[17] != '.') {
3020         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3021           IID = Intrinsic::x86_avx2_psrav_d;
3022         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3023           IID = Intrinsic::x86_avx2_psrav_d_256;
3024         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3025           IID = Intrinsic::x86_avx512_psrav_w_128;
3026         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3027           IID = Intrinsic::x86_avx512_psrav_w_256;
3028         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3029           IID = Intrinsic::x86_avx512_psrav_w_512;
3030         else
3031           llvm_unreachable("Unexpected size");
3032       } else if (Name.endswith(".128")) {
3033         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3034           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3035                             : Intrinsic::x86_sse2_psra_d;
3036         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3037           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3038                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3039                               Intrinsic::x86_avx512_psra_q_128;
3040         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3041           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3042                             : Intrinsic::x86_sse2_psra_w;
3043         else
3044           llvm_unreachable("Unexpected size");
3045       } else if (Name.endswith(".256")) {
3046         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3047           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3048                             : Intrinsic::x86_avx2_psra_d;
3049         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3050           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3051                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3052                               Intrinsic::x86_avx512_psra_q_256;
3053         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3054           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3055                             : Intrinsic::x86_avx2_psra_w;
3056         else
3057           llvm_unreachable("Unexpected size");
3058       } else {
3059         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3060           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3061                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3062                               Intrinsic::x86_avx512_psra_d_512;
3063         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3064           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3065                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3066                               Intrinsic::x86_avx512_psra_q_512;
3067         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3068           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3069                             : Intrinsic::x86_avx512_psra_w_512;
3070         else
3071           llvm_unreachable("Unexpected size");
3072       }
3073 
3074       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3075     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3076       Rep = upgradeMaskedMove(Builder, *CI);
3077     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3078       Rep = UpgradeMaskToInt(Builder, *CI);
3079     } else if (IsX86 && Name.endswith(".movntdqa")) {
3080       Module *M = F->getParent();
3081       MDNode *Node = MDNode::get(
3082           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3083 
3084       Value *Ptr = CI->getArgOperand(0);
3085 
3086       // Convert the type of the pointer to a pointer to the stored type.
3087       Value *BC = Builder.CreateBitCast(
3088           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3089       LoadInst *LI = Builder.CreateAlignedLoad(
3090           CI->getType(), BC,
3091           Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3092       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3093       Rep = LI;
3094     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3095                          Name.startswith("fma.vfmsub.") ||
3096                          Name.startswith("fma.vfnmadd.") ||
3097                          Name.startswith("fma.vfnmsub."))) {
3098       bool NegMul = Name[6] == 'n';
3099       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3100       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3101 
3102       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3103                        CI->getArgOperand(2) };
3104 
3105       if (IsScalar) {
3106         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3107         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3108         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3109       }
3110 
3111       if (NegMul && !IsScalar)
3112         Ops[0] = Builder.CreateFNeg(Ops[0]);
3113       if (NegMul && IsScalar)
3114         Ops[1] = Builder.CreateFNeg(Ops[1]);
3115       if (NegAcc)
3116         Ops[2] = Builder.CreateFNeg(Ops[2]);
3117 
3118       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3119                                                          Intrinsic::fma,
3120                                                          Ops[0]->getType()),
3121                                Ops);
3122 
3123       if (IsScalar)
3124         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3125                                           (uint64_t)0);
3126     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3127       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3128                        CI->getArgOperand(2) };
3129 
3130       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3131       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3132       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3133 
3134       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3135                                                          Intrinsic::fma,
3136                                                          Ops[0]->getType()),
3137                                Ops);
3138 
3139       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3140                                         Rep, (uint64_t)0);
3141     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3142                          Name.startswith("avx512.maskz.vfmadd.s") ||
3143                          Name.startswith("avx512.mask3.vfmadd.s") ||
3144                          Name.startswith("avx512.mask3.vfmsub.s") ||
3145                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3146       bool IsMask3 = Name[11] == '3';
3147       bool IsMaskZ = Name[11] == 'z';
3148       // Drop the "avx512.mask." to make it easier.
3149       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3150       bool NegMul = Name[2] == 'n';
3151       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3152 
3153       Value *A = CI->getArgOperand(0);
3154       Value *B = CI->getArgOperand(1);
3155       Value *C = CI->getArgOperand(2);
3156 
3157       if (NegMul && (IsMask3 || IsMaskZ))
3158         A = Builder.CreateFNeg(A);
3159       if (NegMul && !(IsMask3 || IsMaskZ))
3160         B = Builder.CreateFNeg(B);
3161       if (NegAcc)
3162         C = Builder.CreateFNeg(C);
3163 
3164       A = Builder.CreateExtractElement(A, (uint64_t)0);
3165       B = Builder.CreateExtractElement(B, (uint64_t)0);
3166       C = Builder.CreateExtractElement(C, (uint64_t)0);
3167 
3168       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3169           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3170         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3171 
3172         Intrinsic::ID IID;
3173         if (Name.back() == 'd')
3174           IID = Intrinsic::x86_avx512_vfmadd_f64;
3175         else
3176           IID = Intrinsic::x86_avx512_vfmadd_f32;
3177         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3178         Rep = Builder.CreateCall(FMA, Ops);
3179       } else {
3180         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3181                                                   Intrinsic::fma,
3182                                                   A->getType());
3183         Rep = Builder.CreateCall(FMA, { A, B, C });
3184       }
3185 
3186       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3187                         IsMask3 ? C : A;
3188 
3189       // For Mask3 with NegAcc, we need to create a new extractelement that
3190       // avoids the negation above.
3191       if (NegAcc && IsMask3)
3192         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3193                                                 (uint64_t)0);
3194 
3195       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3196                                 Rep, PassThru);
3197       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3198                                         Rep, (uint64_t)0);
3199     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3200                          Name.startswith("avx512.mask.vfnmadd.p") ||
3201                          Name.startswith("avx512.mask.vfnmsub.p") ||
3202                          Name.startswith("avx512.mask3.vfmadd.p") ||
3203                          Name.startswith("avx512.mask3.vfmsub.p") ||
3204                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3205                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3206       bool IsMask3 = Name[11] == '3';
3207       bool IsMaskZ = Name[11] == 'z';
3208       // Drop the "avx512.mask." to make it easier.
3209       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3210       bool NegMul = Name[2] == 'n';
3211       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3212 
3213       Value *A = CI->getArgOperand(0);
3214       Value *B = CI->getArgOperand(1);
3215       Value *C = CI->getArgOperand(2);
3216 
3217       if (NegMul && (IsMask3 || IsMaskZ))
3218         A = Builder.CreateFNeg(A);
3219       if (NegMul && !(IsMask3 || IsMaskZ))
3220         B = Builder.CreateFNeg(B);
3221       if (NegAcc)
3222         C = Builder.CreateFNeg(C);
3223 
3224       if (CI->getNumArgOperands() == 5 &&
3225           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3226            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3227         Intrinsic::ID IID;
3228         // Check the character before ".512" in string.
3229         if (Name[Name.size()-5] == 's')
3230           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3231         else
3232           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3233 
3234         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3235                                  { A, B, C, CI->getArgOperand(4) });
3236       } else {
3237         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3238                                                   Intrinsic::fma,
3239                                                   A->getType());
3240         Rep = Builder.CreateCall(FMA, { A, B, C });
3241       }
3242 
3243       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3244                         IsMask3 ? CI->getArgOperand(2) :
3245                                   CI->getArgOperand(0);
3246 
3247       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3248     } else if (IsX86 &&  Name.startswith("fma.vfmsubadd.p")) {
3249       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3250       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3251       Intrinsic::ID IID;
3252       if (VecWidth == 128 && EltWidth == 32)
3253         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3254       else if (VecWidth == 256 && EltWidth == 32)
3255         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3256       else if (VecWidth == 128 && EltWidth == 64)
3257         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3258       else if (VecWidth == 256 && EltWidth == 64)
3259         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3260       else
3261         llvm_unreachable("Unexpected intrinsic");
3262 
3263       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3264                        CI->getArgOperand(2) };
3265       Ops[2] = Builder.CreateFNeg(Ops[2]);
3266       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3267                                Ops);
3268     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3269                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3270                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3271                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3272       bool IsMask3 = Name[11] == '3';
3273       bool IsMaskZ = Name[11] == 'z';
3274       // Drop the "avx512.mask." to make it easier.
3275       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3276       bool IsSubAdd = Name[3] == 's';
3277       if (CI->getNumArgOperands() == 5) {
3278         Intrinsic::ID IID;
3279         // Check the character before ".512" in string.
3280         if (Name[Name.size()-5] == 's')
3281           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3282         else
3283           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3284 
3285         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3286                          CI->getArgOperand(2), CI->getArgOperand(4) };
3287         if (IsSubAdd)
3288           Ops[2] = Builder.CreateFNeg(Ops[2]);
3289 
3290         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3291                                  Ops);
3292       } else {
3293         int NumElts = cast<VectorType>(CI->getType())->getNumElements();
3294 
3295         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3296                          CI->getArgOperand(2) };
3297 
3298         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3299                                                   Ops[0]->getType());
3300         Value *Odd = Builder.CreateCall(FMA, Ops);
3301         Ops[2] = Builder.CreateFNeg(Ops[2]);
3302         Value *Even = Builder.CreateCall(FMA, Ops);
3303 
3304         if (IsSubAdd)
3305           std::swap(Even, Odd);
3306 
3307         SmallVector<int, 32> Idxs(NumElts);
3308         for (int i = 0; i != NumElts; ++i)
3309           Idxs[i] = i + (i % 2) * NumElts;
3310 
3311         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3312       }
3313 
3314       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3315                         IsMask3 ? CI->getArgOperand(2) :
3316                                   CI->getArgOperand(0);
3317 
3318       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3319     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3320                          Name.startswith("avx512.maskz.pternlog."))) {
3321       bool ZeroMask = Name[11] == 'z';
3322       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3323       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3324       Intrinsic::ID IID;
3325       if (VecWidth == 128 && EltWidth == 32)
3326         IID = Intrinsic::x86_avx512_pternlog_d_128;
3327       else if (VecWidth == 256 && EltWidth == 32)
3328         IID = Intrinsic::x86_avx512_pternlog_d_256;
3329       else if (VecWidth == 512 && EltWidth == 32)
3330         IID = Intrinsic::x86_avx512_pternlog_d_512;
3331       else if (VecWidth == 128 && EltWidth == 64)
3332         IID = Intrinsic::x86_avx512_pternlog_q_128;
3333       else if (VecWidth == 256 && EltWidth == 64)
3334         IID = Intrinsic::x86_avx512_pternlog_q_256;
3335       else if (VecWidth == 512 && EltWidth == 64)
3336         IID = Intrinsic::x86_avx512_pternlog_q_512;
3337       else
3338         llvm_unreachable("Unexpected intrinsic");
3339 
3340       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3341                         CI->getArgOperand(2), CI->getArgOperand(3) };
3342       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3343                                Args);
3344       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3345                                  : CI->getArgOperand(0);
3346       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3347     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3348                          Name.startswith("avx512.maskz.vpmadd52"))) {
3349       bool ZeroMask = Name[11] == 'z';
3350       bool High = Name[20] == 'h' || Name[21] == 'h';
3351       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3352       Intrinsic::ID IID;
3353       if (VecWidth == 128 && !High)
3354         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3355       else if (VecWidth == 256 && !High)
3356         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3357       else if (VecWidth == 512 && !High)
3358         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3359       else if (VecWidth == 128 && High)
3360         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3361       else if (VecWidth == 256 && High)
3362         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3363       else if (VecWidth == 512 && High)
3364         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3365       else
3366         llvm_unreachable("Unexpected intrinsic");
3367 
3368       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3369                         CI->getArgOperand(2) };
3370       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3371                                Args);
3372       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3373                                  : CI->getArgOperand(0);
3374       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3375     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3376                          Name.startswith("avx512.mask.vpermt2var.") ||
3377                          Name.startswith("avx512.maskz.vpermt2var."))) {
3378       bool ZeroMask = Name[11] == 'z';
3379       bool IndexForm = Name[17] == 'i';
3380       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3381     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3382                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3383                          Name.startswith("avx512.mask.vpdpbusds.") ||
3384                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3385       bool ZeroMask = Name[11] == 'z';
3386       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3387       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3388       Intrinsic::ID IID;
3389       if (VecWidth == 128 && !IsSaturating)
3390         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3391       else if (VecWidth == 256 && !IsSaturating)
3392         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3393       else if (VecWidth == 512 && !IsSaturating)
3394         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3395       else if (VecWidth == 128 && IsSaturating)
3396         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3397       else if (VecWidth == 256 && IsSaturating)
3398         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3399       else if (VecWidth == 512 && IsSaturating)
3400         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3401       else
3402         llvm_unreachable("Unexpected intrinsic");
3403 
3404       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3405                         CI->getArgOperand(2)  };
3406       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3407                                Args);
3408       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3409                                  : CI->getArgOperand(0);
3410       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3411     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3412                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3413                          Name.startswith("avx512.mask.vpdpwssds.") ||
3414                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3415       bool ZeroMask = Name[11] == 'z';
3416       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3417       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3418       Intrinsic::ID IID;
3419       if (VecWidth == 128 && !IsSaturating)
3420         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3421       else if (VecWidth == 256 && !IsSaturating)
3422         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3423       else if (VecWidth == 512 && !IsSaturating)
3424         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3425       else if (VecWidth == 128 && IsSaturating)
3426         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3427       else if (VecWidth == 256 && IsSaturating)
3428         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3429       else if (VecWidth == 512 && IsSaturating)
3430         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3431       else
3432         llvm_unreachable("Unexpected intrinsic");
3433 
3434       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3435                         CI->getArgOperand(2)  };
3436       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3437                                Args);
3438       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3439                                  : CI->getArgOperand(0);
3440       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3441     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3442                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3443                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3444       Intrinsic::ID IID;
3445       if (Name[0] == 'a' && Name.back() == '2')
3446         IID = Intrinsic::x86_addcarry_32;
3447       else if (Name[0] == 'a' && Name.back() == '4')
3448         IID = Intrinsic::x86_addcarry_64;
3449       else if (Name[0] == 's' && Name.back() == '2')
3450         IID = Intrinsic::x86_subborrow_32;
3451       else if (Name[0] == 's' && Name.back() == '4')
3452         IID = Intrinsic::x86_subborrow_64;
3453       else
3454         llvm_unreachable("Unexpected intrinsic");
3455 
3456       // Make a call with 3 operands.
3457       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3458                         CI->getArgOperand(2)};
3459       Value *NewCall = Builder.CreateCall(
3460                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3461                                 Args);
3462 
3463       // Extract the second result and store it.
3464       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3465       // Cast the pointer to the right type.
3466       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3467                                  llvm::PointerType::getUnqual(Data->getType()));
3468       Builder.CreateAlignedStore(Data, Ptr, Align(1));
3469       // Replace the original call result with the first result of the new call.
3470       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3471 
3472       CI->replaceAllUsesWith(CF);
3473       Rep = nullptr;
3474     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3475                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3476       // Rep will be updated by the call in the condition.
3477     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3478       Value *Arg = CI->getArgOperand(0);
3479       Value *Neg = Builder.CreateNeg(Arg, "neg");
3480       Value *Cmp = Builder.CreateICmpSGE(
3481           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3482       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3483     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3484                           Name.startswith("atomic.load.add.f64.p"))) {
3485       Value *Ptr = CI->getArgOperand(0);
3486       Value *Val = CI->getArgOperand(1);
3487       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3488                                     AtomicOrdering::SequentiallyConsistent);
3489     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3490                           Name == "max.ui" || Name == "max.ull")) {
3491       Value *Arg0 = CI->getArgOperand(0);
3492       Value *Arg1 = CI->getArgOperand(1);
3493       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3494                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3495                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3496       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3497     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3498                           Name == "min.ui" || Name == "min.ull")) {
3499       Value *Arg0 = CI->getArgOperand(0);
3500       Value *Arg1 = CI->getArgOperand(1);
3501       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3502                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3503                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3504       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3505     } else if (IsNVVM && Name == "clz.ll") {
3506       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3507       Value *Arg = CI->getArgOperand(0);
3508       Value *Ctlz = Builder.CreateCall(
3509           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3510                                     {Arg->getType()}),
3511           {Arg, Builder.getFalse()}, "ctlz");
3512       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3513     } else if (IsNVVM && Name == "popc.ll") {
3514       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3515       // i64.
3516       Value *Arg = CI->getArgOperand(0);
3517       Value *Popc = Builder.CreateCall(
3518           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3519                                     {Arg->getType()}),
3520           Arg, "ctpop");
3521       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3522     } else if (IsNVVM && Name == "h2f") {
3523       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3524                                    F->getParent(), Intrinsic::convert_from_fp16,
3525                                    {Builder.getFloatTy()}),
3526                                CI->getArgOperand(0), "h2f");
3527     } else {
3528       llvm_unreachable("Unknown function for CallInst upgrade.");
3529     }
3530 
3531     if (Rep)
3532       CI->replaceAllUsesWith(Rep);
3533     CI->eraseFromParent();
3534     return;
3535   }
3536 
3537   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3538     // Handle generic mangling change, but nothing else
3539     assert(
3540         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3541         "Unknown function for CallInst upgrade and isn't just a name change");
3542     CI->setCalledFunction(NewFn);
3543   };
3544   CallInst *NewCall = nullptr;
3545   switch (NewFn->getIntrinsicID()) {
3546   default: {
3547     DefaultCase();
3548     return;
3549   }
3550   case Intrinsic::experimental_vector_reduce_v2_fmul: {
3551     SmallVector<Value *, 2> Args;
3552     if (CI->isFast())
3553       Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3554     else
3555       Args.push_back(CI->getOperand(0));
3556     Args.push_back(CI->getOperand(1));
3557     NewCall = Builder.CreateCall(NewFn, Args);
3558     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3559     break;
3560   }
3561   case Intrinsic::experimental_vector_reduce_v2_fadd: {
3562     SmallVector<Value *, 2> Args;
3563     if (CI->isFast())
3564       Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3565     else
3566       Args.push_back(CI->getOperand(0));
3567     Args.push_back(CI->getOperand(1));
3568     NewCall = Builder.CreateCall(NewFn, Args);
3569     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3570     break;
3571   }
3572   case Intrinsic::arm_neon_vld1:
3573   case Intrinsic::arm_neon_vld2:
3574   case Intrinsic::arm_neon_vld3:
3575   case Intrinsic::arm_neon_vld4:
3576   case Intrinsic::arm_neon_vld2lane:
3577   case Intrinsic::arm_neon_vld3lane:
3578   case Intrinsic::arm_neon_vld4lane:
3579   case Intrinsic::arm_neon_vst1:
3580   case Intrinsic::arm_neon_vst2:
3581   case Intrinsic::arm_neon_vst3:
3582   case Intrinsic::arm_neon_vst4:
3583   case Intrinsic::arm_neon_vst2lane:
3584   case Intrinsic::arm_neon_vst3lane:
3585   case Intrinsic::arm_neon_vst4lane: {
3586     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3587                                  CI->arg_operands().end());
3588     NewCall = Builder.CreateCall(NewFn, Args);
3589     break;
3590   }
3591 
3592   case Intrinsic::bitreverse:
3593     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3594     break;
3595 
3596   case Intrinsic::ctlz:
3597   case Intrinsic::cttz:
3598     assert(CI->getNumArgOperands() == 1 &&
3599            "Mismatch between function args and call args");
3600     NewCall =
3601         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3602     break;
3603 
3604   case Intrinsic::objectsize: {
3605     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3606                                    ? Builder.getFalse()
3607                                    : CI->getArgOperand(2);
3608     Value *Dynamic =
3609         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3610     NewCall = Builder.CreateCall(
3611         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3612     break;
3613   }
3614 
3615   case Intrinsic::ctpop:
3616     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3617     break;
3618 
3619   case Intrinsic::convert_from_fp16:
3620     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3621     break;
3622 
3623   case Intrinsic::dbg_value:
3624     // Upgrade from the old version that had an extra offset argument.
3625     assert(CI->getNumArgOperands() == 4);
3626     // Drop nonzero offsets instead of attempting to upgrade them.
3627     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3628       if (Offset->isZeroValue()) {
3629         NewCall = Builder.CreateCall(
3630             NewFn,
3631             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3632         break;
3633       }
3634     CI->eraseFromParent();
3635     return;
3636 
3637   case Intrinsic::x86_xop_vfrcz_ss:
3638   case Intrinsic::x86_xop_vfrcz_sd:
3639     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3640     break;
3641 
3642   case Intrinsic::x86_xop_vpermil2pd:
3643   case Intrinsic::x86_xop_vpermil2ps:
3644   case Intrinsic::x86_xop_vpermil2pd_256:
3645   case Intrinsic::x86_xop_vpermil2ps_256: {
3646     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3647                                  CI->arg_operands().end());
3648     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3649     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3650     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3651     NewCall = Builder.CreateCall(NewFn, Args);
3652     break;
3653   }
3654 
3655   case Intrinsic::x86_sse41_ptestc:
3656   case Intrinsic::x86_sse41_ptestz:
3657   case Intrinsic::x86_sse41_ptestnzc: {
3658     // The arguments for these intrinsics used to be v4f32, and changed
3659     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3660     // So, the only thing required is a bitcast for both arguments.
3661     // First, check the arguments have the old type.
3662     Value *Arg0 = CI->getArgOperand(0);
3663     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3664       return;
3665 
3666     // Old intrinsic, add bitcasts
3667     Value *Arg1 = CI->getArgOperand(1);
3668 
3669     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3670 
3671     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3672     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3673 
3674     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3675     break;
3676   }
3677 
3678   case Intrinsic::x86_rdtscp: {
3679     // This used to take 1 arguments. If we have no arguments, it is already
3680     // upgraded.
3681     if (CI->getNumOperands() == 0)
3682       return;
3683 
3684     NewCall = Builder.CreateCall(NewFn);
3685     // Extract the second result and store it.
3686     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3687     // Cast the pointer to the right type.
3688     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3689                                  llvm::PointerType::getUnqual(Data->getType()));
3690     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3691     // Replace the original call result with the first result of the new call.
3692     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3693 
3694     std::string Name = std::string(CI->getName());
3695     if (!Name.empty()) {
3696       CI->setName(Name + ".old");
3697       NewCall->setName(Name);
3698     }
3699     CI->replaceAllUsesWith(TSC);
3700     CI->eraseFromParent();
3701     return;
3702   }
3703 
3704   case Intrinsic::x86_sse41_insertps:
3705   case Intrinsic::x86_sse41_dppd:
3706   case Intrinsic::x86_sse41_dpps:
3707   case Intrinsic::x86_sse41_mpsadbw:
3708   case Intrinsic::x86_avx_dp_ps_256:
3709   case Intrinsic::x86_avx2_mpsadbw: {
3710     // Need to truncate the last argument from i32 to i8 -- this argument models
3711     // an inherently 8-bit immediate operand to these x86 instructions.
3712     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3713                                  CI->arg_operands().end());
3714 
3715     // Replace the last argument with a trunc.
3716     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3717     NewCall = Builder.CreateCall(NewFn, Args);
3718     break;
3719   }
3720 
3721   case Intrinsic::thread_pointer: {
3722     NewCall = Builder.CreateCall(NewFn, {});
3723     break;
3724   }
3725 
3726   case Intrinsic::invariant_start:
3727   case Intrinsic::invariant_end:
3728   case Intrinsic::masked_load:
3729   case Intrinsic::masked_store:
3730   case Intrinsic::masked_gather:
3731   case Intrinsic::masked_scatter: {
3732     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3733                                  CI->arg_operands().end());
3734     NewCall = Builder.CreateCall(NewFn, Args);
3735     break;
3736   }
3737 
3738   case Intrinsic::memcpy:
3739   case Intrinsic::memmove:
3740   case Intrinsic::memset: {
3741     // We have to make sure that the call signature is what we're expecting.
3742     // We only want to change the old signatures by removing the alignment arg:
3743     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3744     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3745     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3746     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3747     // Note: i8*'s in the above can be any pointer type
3748     if (CI->getNumArgOperands() != 5) {
3749       DefaultCase();
3750       return;
3751     }
3752     // Remove alignment argument (3), and add alignment attributes to the
3753     // dest/src pointers.
3754     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3755                       CI->getArgOperand(2), CI->getArgOperand(4)};
3756     NewCall = Builder.CreateCall(NewFn, Args);
3757     auto *MemCI = cast<MemIntrinsic>(NewCall);
3758     // All mem intrinsics support dest alignment.
3759     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3760     MemCI->setDestAlignment(Align->getMaybeAlignValue());
3761     // Memcpy/Memmove also support source alignment.
3762     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3763       MTI->setSourceAlignment(Align->getMaybeAlignValue());
3764     break;
3765   }
3766   }
3767   assert(NewCall && "Should have either set this variable or returned through "
3768                     "the default case");
3769   std::string Name = std::string(CI->getName());
3770   if (!Name.empty()) {
3771     CI->setName(Name + ".old");
3772     NewCall->setName(Name);
3773   }
3774   CI->replaceAllUsesWith(NewCall);
3775   CI->eraseFromParent();
3776 }
3777 
3778 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3779   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3780 
3781   // Check if this function should be upgraded and get the replacement function
3782   // if there is one.
3783   Function *NewFn;
3784   if (UpgradeIntrinsicFunction(F, NewFn)) {
3785     // Replace all users of the old function with the new function or new
3786     // instructions. This is not a range loop because the call is deleted.
3787     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3788       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3789         UpgradeIntrinsicCall(CI, NewFn);
3790 
3791     // Remove old function, no longer used, from the module.
3792     F->eraseFromParent();
3793   }
3794 }
3795 
3796 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3797   // Check if the tag uses struct-path aware TBAA format.
3798   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3799     return &MD;
3800 
3801   auto &Context = MD.getContext();
3802   if (MD.getNumOperands() == 3) {
3803     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3804     MDNode *ScalarType = MDNode::get(Context, Elts);
3805     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3806     Metadata *Elts2[] = {ScalarType, ScalarType,
3807                          ConstantAsMetadata::get(
3808                              Constant::getNullValue(Type::getInt64Ty(Context))),
3809                          MD.getOperand(2)};
3810     return MDNode::get(Context, Elts2);
3811   }
3812   // Create a MDNode <MD, MD, offset 0>
3813   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3814                                     Type::getInt64Ty(Context)))};
3815   return MDNode::get(Context, Elts);
3816 }
3817 
3818 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3819                                       Instruction *&Temp) {
3820   if (Opc != Instruction::BitCast)
3821     return nullptr;
3822 
3823   Temp = nullptr;
3824   Type *SrcTy = V->getType();
3825   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3826       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3827     LLVMContext &Context = V->getContext();
3828 
3829     // We have no information about target data layout, so we assume that
3830     // the maximum pointer size is 64bit.
3831     Type *MidTy = Type::getInt64Ty(Context);
3832     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3833 
3834     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3835   }
3836 
3837   return nullptr;
3838 }
3839 
3840 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3841   if (Opc != Instruction::BitCast)
3842     return nullptr;
3843 
3844   Type *SrcTy = C->getType();
3845   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3846       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3847     LLVMContext &Context = C->getContext();
3848 
3849     // We have no information about target data layout, so we assume that
3850     // the maximum pointer size is 64bit.
3851     Type *MidTy = Type::getInt64Ty(Context);
3852 
3853     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3854                                      DestTy);
3855   }
3856 
3857   return nullptr;
3858 }
3859 
3860 /// Check the debug info version number, if it is out-dated, drop the debug
3861 /// info. Return true if module is modified.
3862 bool llvm::UpgradeDebugInfo(Module &M) {
3863   unsigned Version = getDebugMetadataVersionFromModule(M);
3864   if (Version == DEBUG_METADATA_VERSION) {
3865     bool BrokenDebugInfo = false;
3866     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3867       report_fatal_error("Broken module found, compilation aborted!");
3868     if (!BrokenDebugInfo)
3869       // Everything is ok.
3870       return false;
3871     else {
3872       // Diagnose malformed debug info.
3873       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3874       M.getContext().diagnose(Diag);
3875     }
3876   }
3877   bool Modified = StripDebugInfo(M);
3878   if (Modified && Version != DEBUG_METADATA_VERSION) {
3879     // Diagnose a version mismatch.
3880     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3881     M.getContext().diagnose(DiagVersion);
3882   }
3883   return Modified;
3884 }
3885 
3886 /// This checks for objc retain release marker which should be upgraded. It
3887 /// returns true if module is modified.
3888 static bool UpgradeRetainReleaseMarker(Module &M) {
3889   bool Changed = false;
3890   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3891   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3892   if (ModRetainReleaseMarker) {
3893     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3894     if (Op) {
3895       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3896       if (ID) {
3897         SmallVector<StringRef, 4> ValueComp;
3898         ID->getString().split(ValueComp, "#");
3899         if (ValueComp.size() == 2) {
3900           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3901           ID = MDString::get(M.getContext(), NewValue);
3902         }
3903         M.addModuleFlag(Module::Error, MarkerKey, ID);
3904         M.eraseNamedMetadata(ModRetainReleaseMarker);
3905         Changed = true;
3906       }
3907     }
3908   }
3909   return Changed;
3910 }
3911 
3912 void llvm::UpgradeARCRuntime(Module &M) {
3913   // This lambda converts normal function calls to ARC runtime functions to
3914   // intrinsic calls.
3915   auto UpgradeToIntrinsic = [&](const char *OldFunc,
3916                                 llvm::Intrinsic::ID IntrinsicFunc) {
3917     Function *Fn = M.getFunction(OldFunc);
3918 
3919     if (!Fn)
3920       return;
3921 
3922     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3923 
3924     for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3925       CallInst *CI = dyn_cast<CallInst>(*I++);
3926       if (!CI || CI->getCalledFunction() != Fn)
3927         continue;
3928 
3929       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3930       FunctionType *NewFuncTy = NewFn->getFunctionType();
3931       SmallVector<Value *, 2> Args;
3932 
3933       // Don't upgrade the intrinsic if it's not valid to bitcast the return
3934       // value to the return type of the old function.
3935       if (NewFuncTy->getReturnType() != CI->getType() &&
3936           !CastInst::castIsValid(Instruction::BitCast, CI,
3937                                  NewFuncTy->getReturnType()))
3938         continue;
3939 
3940       bool InvalidCast = false;
3941 
3942       for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3943         Value *Arg = CI->getArgOperand(I);
3944 
3945         // Bitcast argument to the parameter type of the new function if it's
3946         // not a variadic argument.
3947         if (I < NewFuncTy->getNumParams()) {
3948           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
3949           // to the parameter type of the new function.
3950           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
3951                                      NewFuncTy->getParamType(I))) {
3952             InvalidCast = true;
3953             break;
3954           }
3955           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3956         }
3957         Args.push_back(Arg);
3958       }
3959 
3960       if (InvalidCast)
3961         continue;
3962 
3963       // Create a call instruction that calls the new function.
3964       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3965       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3966       NewCall->setName(CI->getName());
3967 
3968       // Bitcast the return value back to the type of the old call.
3969       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3970 
3971       if (!CI->use_empty())
3972         CI->replaceAllUsesWith(NewRetVal);
3973       CI->eraseFromParent();
3974     }
3975 
3976     if (Fn->use_empty())
3977       Fn->eraseFromParent();
3978   };
3979 
3980   // Unconditionally convert a call to "clang.arc.use" to a call to
3981   // "llvm.objc.clang.arc.use".
3982   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3983 
3984   // Upgrade the retain release marker. If there is no need to upgrade
3985   // the marker, that means either the module is already new enough to contain
3986   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3987   if (!UpgradeRetainReleaseMarker(M))
3988     return;
3989 
3990   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3991       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3992       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3993       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3994       {"objc_autoreleaseReturnValue",
3995        llvm::Intrinsic::objc_autoreleaseReturnValue},
3996       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3997       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3998       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3999       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4000       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4001       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4002       {"objc_release", llvm::Intrinsic::objc_release},
4003       {"objc_retain", llvm::Intrinsic::objc_retain},
4004       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4005       {"objc_retainAutoreleaseReturnValue",
4006        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4007       {"objc_retainAutoreleasedReturnValue",
4008        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4009       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4010       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4011       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4012       {"objc_unsafeClaimAutoreleasedReturnValue",
4013        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4014       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4015       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4016       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4017       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4018       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4019       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4020       {"objc_arc_annotation_topdown_bbstart",
4021        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4022       {"objc_arc_annotation_topdown_bbend",
4023        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4024       {"objc_arc_annotation_bottomup_bbstart",
4025        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4026       {"objc_arc_annotation_bottomup_bbend",
4027        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4028 
4029   for (auto &I : RuntimeFuncs)
4030     UpgradeToIntrinsic(I.first, I.second);
4031 }
4032 
4033 bool llvm::UpgradeModuleFlags(Module &M) {
4034   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4035   if (!ModFlags)
4036     return false;
4037 
4038   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4039   bool HasSwiftVersionFlag = false;
4040   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4041   uint32_t SwiftABIVersion;
4042   auto Int8Ty = Type::getInt8Ty(M.getContext());
4043   auto Int32Ty = Type::getInt32Ty(M.getContext());
4044 
4045   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4046     MDNode *Op = ModFlags->getOperand(I);
4047     if (Op->getNumOperands() != 3)
4048       continue;
4049     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4050     if (!ID)
4051       continue;
4052     if (ID->getString() == "Objective-C Image Info Version")
4053       HasObjCFlag = true;
4054     if (ID->getString() == "Objective-C Class Properties")
4055       HasClassProperties = true;
4056     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4057     // field was Error and now they are Max.
4058     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4059       if (auto *Behavior =
4060               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4061         if (Behavior->getLimitedValue() == Module::Error) {
4062           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4063           Metadata *Ops[3] = {
4064               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4065               MDString::get(M.getContext(), ID->getString()),
4066               Op->getOperand(2)};
4067           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4068           Changed = true;
4069         }
4070       }
4071     }
4072     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4073     // section name so that llvm-lto will not complain about mismatching
4074     // module flags that is functionally the same.
4075     if (ID->getString() == "Objective-C Image Info Section") {
4076       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4077         SmallVector<StringRef, 4> ValueComp;
4078         Value->getString().split(ValueComp, " ");
4079         if (ValueComp.size() != 1) {
4080           std::string NewValue;
4081           for (auto &S : ValueComp)
4082             NewValue += S.str();
4083           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4084                               MDString::get(M.getContext(), NewValue)};
4085           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4086           Changed = true;
4087         }
4088       }
4089     }
4090 
4091     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4092     // If the higher bits are set, it adds new module flag for swift info.
4093     if (ID->getString() == "Objective-C Garbage Collection") {
4094       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4095       if (Md) {
4096         assert(Md->getValue() && "Expected non-empty metadata");
4097         auto Type = Md->getValue()->getType();
4098         if (Type == Int8Ty)
4099           continue;
4100         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4101         if ((Val & 0xff) != Val) {
4102           HasSwiftVersionFlag = true;
4103           SwiftABIVersion = (Val & 0xff00) >> 8;
4104           SwiftMajorVersion = (Val & 0xff000000) >> 24;
4105           SwiftMinorVersion = (Val & 0xff0000) >> 16;
4106         }
4107         Metadata *Ops[3] = {
4108           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4109           Op->getOperand(1),
4110           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4111         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4112         Changed = true;
4113       }
4114     }
4115   }
4116 
4117   // "Objective-C Class Properties" is recently added for Objective-C. We
4118   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4119   // flag of value 0, so we can correclty downgrade this flag when trying to
4120   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4121   // this module flag.
4122   if (HasObjCFlag && !HasClassProperties) {
4123     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4124                     (uint32_t)0);
4125     Changed = true;
4126   }
4127 
4128   if (HasSwiftVersionFlag) {
4129     M.addModuleFlag(Module::Error, "Swift ABI Version",
4130                     SwiftABIVersion);
4131     M.addModuleFlag(Module::Error, "Swift Major Version",
4132                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
4133     M.addModuleFlag(Module::Error, "Swift Minor Version",
4134                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
4135     Changed = true;
4136   }
4137 
4138   return Changed;
4139 }
4140 
4141 void llvm::UpgradeSectionAttributes(Module &M) {
4142   auto TrimSpaces = [](StringRef Section) -> std::string {
4143     SmallVector<StringRef, 5> Components;
4144     Section.split(Components, ',');
4145 
4146     SmallString<32> Buffer;
4147     raw_svector_ostream OS(Buffer);
4148 
4149     for (auto Component : Components)
4150       OS << ',' << Component.trim();
4151 
4152     return std::string(OS.str().substr(1));
4153   };
4154 
4155   for (auto &GV : M.globals()) {
4156     if (!GV.hasSection())
4157       continue;
4158 
4159     StringRef Section = GV.getSection();
4160 
4161     if (!Section.startswith("__DATA, __objc_catlist"))
4162       continue;
4163 
4164     // __DATA, __objc_catlist, regular, no_dead_strip
4165     // __DATA,__objc_catlist,regular,no_dead_strip
4166     GV.setSection(TrimSpaces(Section));
4167   }
4168 }
4169 
4170 namespace {
4171 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4172 // callsites within a function that did not also have the strictfp attribute.
4173 // Since 10.0, if strict FP semantics are needed within a function, the
4174 // function must have the strictfp attribute and all calls within the function
4175 // must also have the strictfp attribute. This latter restriction is
4176 // necessary to prevent unwanted libcall simplification when a function is
4177 // being cloned (such as for inlining).
4178 //
4179 // The "dangling" strictfp attribute usage was only used to prevent constant
4180 // folding and other libcall simplification. The nobuiltin attribute on the
4181 // callsite has the same effect.
4182 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4183   StrictFPUpgradeVisitor() {}
4184 
4185   void visitCallBase(CallBase &Call) {
4186     if (!Call.isStrictFP())
4187       return;
4188     if (isa<ConstrainedFPIntrinsic>(&Call))
4189       return;
4190     // If we get here, the caller doesn't have the strictfp attribute
4191     // but this callsite does. Replace the strictfp attribute with nobuiltin.
4192     Call.removeAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
4193     Call.addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
4194   }
4195 };
4196 } // namespace
4197 
4198 void llvm::UpgradeFunctionAttributes(Function &F) {
4199   // If a function definition doesn't have the strictfp attribute,
4200   // convert any callsite strictfp attributes to nobuiltin.
4201   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4202     StrictFPUpgradeVisitor SFPV;
4203     SFPV.visit(F);
4204   }
4205 }
4206 
4207 static bool isOldLoopArgument(Metadata *MD) {
4208   auto *T = dyn_cast_or_null<MDTuple>(MD);
4209   if (!T)
4210     return false;
4211   if (T->getNumOperands() < 1)
4212     return false;
4213   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4214   if (!S)
4215     return false;
4216   return S->getString().startswith("llvm.vectorizer.");
4217 }
4218 
4219 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4220   StringRef OldPrefix = "llvm.vectorizer.";
4221   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4222 
4223   if (OldTag == "llvm.vectorizer.unroll")
4224     return MDString::get(C, "llvm.loop.interleave.count");
4225 
4226   return MDString::get(
4227       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4228              .str());
4229 }
4230 
4231 static Metadata *upgradeLoopArgument(Metadata *MD) {
4232   auto *T = dyn_cast_or_null<MDTuple>(MD);
4233   if (!T)
4234     return MD;
4235   if (T->getNumOperands() < 1)
4236     return MD;
4237   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4238   if (!OldTag)
4239     return MD;
4240   if (!OldTag->getString().startswith("llvm.vectorizer."))
4241     return MD;
4242 
4243   // This has an old tag.  Upgrade it.
4244   SmallVector<Metadata *, 8> Ops;
4245   Ops.reserve(T->getNumOperands());
4246   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4247   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4248     Ops.push_back(T->getOperand(I));
4249 
4250   return MDTuple::get(T->getContext(), Ops);
4251 }
4252 
4253 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4254   auto *T = dyn_cast<MDTuple>(&N);
4255   if (!T)
4256     return &N;
4257 
4258   if (none_of(T->operands(), isOldLoopArgument))
4259     return &N;
4260 
4261   SmallVector<Metadata *, 8> Ops;
4262   Ops.reserve(T->getNumOperands());
4263   for (Metadata *MD : T->operands())
4264     Ops.push_back(upgradeLoopArgument(MD));
4265 
4266   return MDTuple::get(T->getContext(), Ops);
4267 }
4268 
4269 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4270   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4271 
4272   // If X86, and the datalayout matches the expected format, add pointer size
4273   // address spaces to the datalayout.
4274   if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
4275     return std::string(DL);
4276 
4277   SmallVector<StringRef, 4> Groups;
4278   Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4279   if (!R.match(DL, &Groups))
4280     return std::string(DL);
4281 
4282   SmallString<1024> Buf;
4283   std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4284   return Res;
4285 }
4286 
4287 void llvm::UpgradeAttributes(AttrBuilder &B) {
4288   StringRef FramePointer;
4289   if (B.contains("no-frame-pointer-elim")) {
4290     // The value can be "true" or "false".
4291     for (const auto &I : B.td_attrs())
4292       if (I.first == "no-frame-pointer-elim")
4293         FramePointer = I.second == "true" ? "all" : "none";
4294     B.removeAttribute("no-frame-pointer-elim");
4295   }
4296   if (B.contains("no-frame-pointer-elim-non-leaf")) {
4297     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4298     if (FramePointer != "all")
4299       FramePointer = "non-leaf";
4300     B.removeAttribute("no-frame-pointer-elim-non-leaf");
4301   }
4302   if (!FramePointer.empty())
4303     B.addAttribute("frame-pointer", FramePointer);
4304 
4305   if (B.contains("null-pointer-is-valid")) {
4306     // The value can be "true" or "false".
4307     bool NullPointerIsValid = false;
4308     for (const auto &I : B.td_attrs())
4309       if (I.first == "null-pointer-is-valid")
4310         NullPointerIsValid = I.second == "true";
4311     B.removeAttribute("null-pointer-is-valid");
4312     if (NullPointerIsValid)
4313       B.addAttribute(Attribute::NullPointerIsValid);
4314   }
4315 }
4316