xref: /freebsd/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 9dba64be9536c28e4800e06512b7f29b43ade345)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
30 #include <cstring>
31 using namespace llvm;
32 
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
34 
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
38                                   Function *&NewFn) {
39   // Check whether this is an old version of the function, which received
40   // v4f32 arguments.
41   Type *Arg0Type = F->getFunctionType()->getParamType(0);
42   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43     return false;
44 
45   // Yes, it's old, replace it with new version.
46   rename(F);
47   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
48   return true;
49 }
50 
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54                                              Function *&NewFn) {
55   // Check that the last argument is an i32.
56   Type *LastArgType = F->getFunctionType()->getParamType(
57      F->getFunctionType()->getNumParams() - 1);
58   if (!LastArgType->isIntegerTy(32))
59     return false;
60 
61   // Move this function aside and map down.
62   rename(F);
63   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
64   return true;
65 }
66 
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68   // All of the intrinsics matches below should be marked with which llvm
69   // version started autoupgrading them. At some point in the future we would
70   // like to use this information to remove upgrade code for some older
71   // intrinsics. It is currently undecided how we will determine that future
72   // point.
73   if (Name == "addcarryx.u32" || // Added in 8.0
74       Name == "addcarryx.u64" || // Added in 8.0
75       Name == "addcarry.u32" || // Added in 8.0
76       Name == "addcarry.u64" || // Added in 8.0
77       Name == "subborrow.u32" || // Added in 8.0
78       Name == "subborrow.u64" || // Added in 8.0
79       Name.startswith("sse2.padds.") || // Added in 8.0
80       Name.startswith("sse2.psubs.") || // Added in 8.0
81       Name.startswith("sse2.paddus.") || // Added in 8.0
82       Name.startswith("sse2.psubus.") || // Added in 8.0
83       Name.startswith("avx2.padds.") || // Added in 8.0
84       Name.startswith("avx2.psubs.") || // Added in 8.0
85       Name.startswith("avx2.paddus.") || // Added in 8.0
86       Name.startswith("avx2.psubus.") || // Added in 8.0
87       Name.startswith("avx512.padds.") || // Added in 8.0
88       Name.startswith("avx512.psubs.") || // Added in 8.0
89       Name.startswith("avx512.mask.padds.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93       Name=="ssse3.pabs.b.128" || // Added in 6.0
94       Name=="ssse3.pabs.w.128" || // Added in 6.0
95       Name=="ssse3.pabs.d.128" || // Added in 6.0
96       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97       Name.startswith("fma.vfmadd.") || // Added in 7.0
98       Name.startswith("fma.vfmsub.") || // Added in 7.0
99       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101       Name.startswith("fma.vfnmadd.") || // Added in 7.0
102       Name.startswith("fma.vfnmsub.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
115       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
116       Name.startswith("avx512.kunpck") || //added in 6.0
117       Name.startswith("avx2.pabs.") || // Added in 6.0
118       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
119       Name.startswith("avx512.broadcastm") || // Added in 6.0
120       Name == "sse.sqrt.ss" || // Added in 7.0
121       Name == "sse2.sqrt.sd" || // Added in 7.0
122       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123       Name.startswith("avx.sqrt.p") || // Added in 7.0
124       Name.startswith("sse2.sqrt.p") || // Added in 7.0
125       Name.startswith("sse.sqrt.p") || // Added in 7.0
126       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
128       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
130       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
131       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133       Name.startswith("avx.vperm2f128.") || // Added in 6.0
134       Name == "avx2.vperm2i128" || // Added in 6.0
135       Name == "sse.add.ss" || // Added in 4.0
136       Name == "sse2.add.sd" || // Added in 4.0
137       Name == "sse.sub.ss" || // Added in 4.0
138       Name == "sse2.sub.sd" || // Added in 4.0
139       Name == "sse.mul.ss" || // Added in 4.0
140       Name == "sse2.mul.sd" || // Added in 4.0
141       Name == "sse.div.ss" || // Added in 4.0
142       Name == "sse2.div.sd" || // Added in 4.0
143       Name == "sse41.pmaxsb" || // Added in 3.9
144       Name == "sse2.pmaxs.w" || // Added in 3.9
145       Name == "sse41.pmaxsd" || // Added in 3.9
146       Name == "sse2.pmaxu.b" || // Added in 3.9
147       Name == "sse41.pmaxuw" || // Added in 3.9
148       Name == "sse41.pmaxud" || // Added in 3.9
149       Name == "sse41.pminsb" || // Added in 3.9
150       Name == "sse2.pmins.w" || // Added in 3.9
151       Name == "sse41.pminsd" || // Added in 3.9
152       Name == "sse2.pminu.b" || // Added in 3.9
153       Name == "sse41.pminuw" || // Added in 3.9
154       Name == "sse41.pminud" || // Added in 3.9
155       Name == "avx512.kand.w" || // Added in 7.0
156       Name == "avx512.kandn.w" || // Added in 7.0
157       Name == "avx512.knot.w" || // Added in 7.0
158       Name == "avx512.kor.w" || // Added in 7.0
159       Name == "avx512.kxor.w" || // Added in 7.0
160       Name == "avx512.kxnor.w" || // Added in 7.0
161       Name == "avx512.kortestc.w" || // Added in 7.0
162       Name == "avx512.kortestz.w" || // Added in 7.0
163       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164       Name.startswith("avx2.pmax") || // Added in 3.9
165       Name.startswith("avx2.pmin") || // Added in 3.9
166       Name.startswith("avx512.mask.pmax") || // Added in 4.0
167       Name.startswith("avx512.mask.pmin") || // Added in 4.0
168       Name.startswith("avx2.vbroadcast") || // Added in 3.8
169       Name.startswith("avx2.pbroadcast") || // Added in 3.8
170       Name.startswith("avx.vpermil.") || // Added in 3.1
171       Name.startswith("sse2.pshuf") || // Added in 3.9
172       Name.startswith("avx512.pbroadcast") || // Added in 3.9
173       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174       Name.startswith("avx512.mask.movddup") || // Added in 3.9
175       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
176       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
177       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
181       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
183       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
184       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
185       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
186       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
187       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
188       Name.startswith("avx512.mask.pand.") || // Added in 3.9
189       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
190       Name.startswith("avx512.mask.por.") || // Added in 3.9
191       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
192       Name.startswith("avx512.mask.and.") || // Added in 3.9
193       Name.startswith("avx512.mask.andn.") || // Added in 3.9
194       Name.startswith("avx512.mask.or.") || // Added in 3.9
195       Name.startswith("avx512.mask.xor.") || // Added in 3.9
196       Name.startswith("avx512.mask.padd.") || // Added in 4.0
197       Name.startswith("avx512.mask.psub.") || // Added in 4.0
198       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
199       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216       Name == "avx512.cvtusi2sd" || // Added in 7.0
217       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218       Name == "sse2.pmulu.dq" || // Added in 7.0
219       Name == "sse41.pmuldq" || // Added in 7.0
220       Name == "avx2.pmulu.dq" || // Added in 7.0
221       Name == "avx2.pmul.dq" || // Added in 7.0
222       Name == "avx512.pmulu.dq.512" || // Added in 7.0
223       Name == "avx512.pmul.dq.512" || // Added in 7.0
224       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
225       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
231       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
232       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
233       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
234       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
235       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
240       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
241       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
242       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
244       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
245       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
246       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
248       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
249       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
251       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
252       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
254       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
255       Name.startswith("avx512.mask.pslli") || // Added in 4.0
256       Name.startswith("avx512.mask.psrai") || // Added in 4.0
257       Name.startswith("avx512.mask.psrli") || // Added in 4.0
258       Name.startswith("avx512.mask.psllv") || // Added in 4.0
259       Name.startswith("avx512.mask.psrav") || // Added in 4.0
260       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
261       Name.startswith("sse41.pmovsx") || // Added in 3.8
262       Name.startswith("sse41.pmovzx") || // Added in 3.9
263       Name.startswith("avx2.pmovsx") || // Added in 3.9
264       Name.startswith("avx2.pmovzx") || // Added in 3.9
265       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
266       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
267       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
269       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
285       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290       Name.startswith("avx512.vpshld.") || // Added in 8.0
291       Name.startswith("avx512.vpshrd.") || // Added in 8.0
292       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
302       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
303       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
304       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
305       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
306       Name == "sse.cvtsi2ss" || // Added in 7.0
307       Name == "sse.cvtsi642ss" || // Added in 7.0
308       Name == "sse2.cvtsi2sd" || // Added in 7.0
309       Name == "sse2.cvtsi642sd" || // Added in 7.0
310       Name == "sse2.cvtss2sd" || // Added in 7.0
311       Name == "sse2.cvtdq2pd" || // Added in 3.9
312       Name == "sse2.cvtdq2ps" || // Added in 7.0
313       Name == "sse2.cvtps2pd" || // Added in 3.9
314       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
315       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
316       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
317       Name.startswith("avx.vinsertf128.") || // Added in 3.7
318       Name == "avx2.vinserti128" || // Added in 3.7
319       Name.startswith("avx512.mask.insert") || // Added in 4.0
320       Name.startswith("avx.vextractf128.") || // Added in 3.7
321       Name == "avx2.vextracti128" || // Added in 3.7
322       Name.startswith("avx512.mask.vextract") || // Added in 4.0
323       Name.startswith("sse4a.movnt.") || // Added in 3.9
324       Name.startswith("avx.movnt.") || // Added in 3.2
325       Name.startswith("avx512.storent.") || // Added in 3.9
326       Name == "sse41.movntdqa" || // Added in 5.0
327       Name == "avx2.movntdqa" || // Added in 5.0
328       Name == "avx512.movntdqa" || // Added in 5.0
329       Name == "sse2.storel.dq" || // Added in 3.9
330       Name.startswith("sse.storeu.") || // Added in 3.9
331       Name.startswith("sse2.storeu.") || // Added in 3.9
332       Name.startswith("avx.storeu.") || // Added in 3.9
333       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
334       Name.startswith("avx512.mask.store.p") || // Added in 3.9
335       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
336       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
338       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
339       Name == "avx512.mask.store.ss" || // Added in 7.0
340       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
341       Name.startswith("avx512.mask.load.") || // Added in 3.9
342       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
343       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
344       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
345       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
346       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
347       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
349       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
350       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
351       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
354       Name == "sse42.crc32.64.8" || // Added in 3.4
355       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
356       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
357       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
358       Name.startswith("avx512.mask.valign.") || // Added in 4.0
359       Name.startswith("sse2.psll.dq") || // Added in 3.7
360       Name.startswith("sse2.psrl.dq") || // Added in 3.7
361       Name.startswith("avx2.psll.dq") || // Added in 3.7
362       Name.startswith("avx2.psrl.dq") || // Added in 3.7
363       Name.startswith("avx512.psll.dq") || // Added in 3.9
364       Name.startswith("avx512.psrl.dq") || // Added in 3.9
365       Name == "sse41.pblendw" || // Added in 3.7
366       Name.startswith("sse41.blendp") || // Added in 3.7
367       Name.startswith("avx.blend.p") || // Added in 3.7
368       Name == "avx2.pblendw" || // Added in 3.7
369       Name.startswith("avx2.pblendd.") || // Added in 3.7
370       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
371       Name == "avx2.vbroadcasti128" || // Added in 3.7
372       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
373       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
374       Name == "xop.vpcmov" || // Added in 3.8
375       Name == "xop.vpcmov.256" || // Added in 5.0
376       Name.startswith("avx512.mask.move.s") || // Added in 4.0
377       Name.startswith("avx512.cvtmask2") || // Added in 5.0
378       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
379       Name.startswith("xop.vprot") || // Added in 8.0
380       Name.startswith("avx512.prol") || // Added in 8.0
381       Name.startswith("avx512.pror") || // Added in 8.0
382       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
383       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
384       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
385       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
386       Name.startswith("avx512.ptestm") || //Added in 6.0
387       Name.startswith("avx512.ptestnm") || //Added in 6.0
388       Name.startswith("avx512.mask.pavg")) // Added in 6.0
389     return true;
390 
391   return false;
392 }
393 
394 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
395                                         Function *&NewFn) {
396   // Only handle intrinsics that start with "x86.".
397   if (!Name.startswith("x86."))
398     return false;
399   // Remove "x86." prefix.
400   Name = Name.substr(4);
401 
402   if (ShouldUpgradeX86Intrinsic(F, Name)) {
403     NewFn = nullptr;
404     return true;
405   }
406 
407   if (Name == "rdtscp") { // Added in 8.0
408     // If this intrinsic has 0 operands, it's the new version.
409     if (F->getFunctionType()->getNumParams() == 0)
410       return false;
411 
412     rename(F);
413     NewFn = Intrinsic::getDeclaration(F->getParent(),
414                                       Intrinsic::x86_rdtscp);
415     return true;
416   }
417 
418   // SSE4.1 ptest functions may have an old signature.
419   if (Name.startswith("sse41.ptest")) { // Added in 3.2
420     if (Name.substr(11) == "c")
421       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
422     if (Name.substr(11) == "z")
423       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
424     if (Name.substr(11) == "nzc")
425       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
426   }
427   // Several blend and other instructions with masks used the wrong number of
428   // bits.
429   if (Name == "sse41.insertps") // Added in 3.6
430     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
431                                             NewFn);
432   if (Name == "sse41.dppd") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
434                                             NewFn);
435   if (Name == "sse41.dpps") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
437                                             NewFn);
438   if (Name == "sse41.mpsadbw") // Added in 3.6
439     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
440                                             NewFn);
441   if (Name == "avx.dp.ps.256") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
443                                             NewFn);
444   if (Name == "avx2.mpsadbw") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
446                                             NewFn);
447 
448   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
449   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
450     rename(F);
451     NewFn = Intrinsic::getDeclaration(F->getParent(),
452                                       Intrinsic::x86_xop_vfrcz_ss);
453     return true;
454   }
455   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
456     rename(F);
457     NewFn = Intrinsic::getDeclaration(F->getParent(),
458                                       Intrinsic::x86_xop_vfrcz_sd);
459     return true;
460   }
461   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
462   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
463     auto Idx = F->getFunctionType()->getParamType(2);
464     if (Idx->isFPOrFPVectorTy()) {
465       rename(F);
466       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
467       unsigned EltSize = Idx->getScalarSizeInBits();
468       Intrinsic::ID Permil2ID;
469       if (EltSize == 64 && IdxSize == 128)
470         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
471       else if (EltSize == 32 && IdxSize == 128)
472         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
473       else if (EltSize == 64 && IdxSize == 256)
474         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
475       else
476         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
477       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
478       return true;
479     }
480   }
481 
482   if (Name == "seh.recoverfp") {
483     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
484     return true;
485   }
486 
487   return false;
488 }
489 
490 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
491   assert(F && "Illegal to upgrade a non-existent Function.");
492 
493   // Quickly eliminate it, if it's not a candidate.
494   StringRef Name = F->getName();
495   if (Name.size() <= 8 || !Name.startswith("llvm."))
496     return false;
497   Name = Name.substr(5); // Strip off "llvm."
498 
499   switch (Name[0]) {
500   default: break;
501   case 'a': {
502     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
503       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
504                                         F->arg_begin()->getType());
505       return true;
506     }
507     if (Name.startswith("arm.neon.vclz")) {
508       Type* args[2] = {
509         F->arg_begin()->getType(),
510         Type::getInt1Ty(F->getContext())
511       };
512       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
513       // the end of the name. Change name from llvm.arm.neon.vclz.* to
514       //  llvm.ctlz.*
515       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
516       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
517                                "llvm.ctlz." + Name.substr(14), F->getParent());
518       return true;
519     }
520     if (Name.startswith("arm.neon.vcnt")) {
521       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
522                                         F->arg_begin()->getType());
523       return true;
524     }
525     static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
526     if (vldRegex.match(Name)) {
527       auto fArgs = F->getFunctionType()->params();
528       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
529       // Can't use Intrinsic::getDeclaration here as the return types might
530       // then only be structurally equal.
531       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
532       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
533                                "llvm." + Name + ".p0i8", F->getParent());
534       return true;
535     }
536     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
537     if (vstRegex.match(Name)) {
538       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
539                                                 Intrinsic::arm_neon_vst2,
540                                                 Intrinsic::arm_neon_vst3,
541                                                 Intrinsic::arm_neon_vst4};
542 
543       static const Intrinsic::ID StoreLaneInts[] = {
544         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
545         Intrinsic::arm_neon_vst4lane
546       };
547 
548       auto fArgs = F->getFunctionType()->params();
549       Type *Tys[] = {fArgs[0], fArgs[1]};
550       if (Name.find("lane") == StringRef::npos)
551         NewFn = Intrinsic::getDeclaration(F->getParent(),
552                                           StoreInts[fArgs.size() - 3], Tys);
553       else
554         NewFn = Intrinsic::getDeclaration(F->getParent(),
555                                           StoreLaneInts[fArgs.size() - 5], Tys);
556       return true;
557     }
558     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
559       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
560       return true;
561     }
562     if (Name.startswith("aarch64.neon.addp")) {
563       if (F->arg_size() != 2)
564         break; // Invalid IR.
565       auto fArgs = F->getFunctionType()->params();
566       VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
567       if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
568         NewFn = Intrinsic::getDeclaration(F->getParent(),
569                                           Intrinsic::aarch64_neon_faddp, fArgs);
570         return true;
571       }
572     }
573     break;
574   }
575 
576   case 'c': {
577     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
578       rename(F);
579       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
580                                         F->arg_begin()->getType());
581       return true;
582     }
583     if (Name.startswith("cttz.") && F->arg_size() == 1) {
584       rename(F);
585       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
586                                         F->arg_begin()->getType());
587       return true;
588     }
589     break;
590   }
591   case 'd': {
592     if (Name == "dbg.value" && F->arg_size() == 4) {
593       rename(F);
594       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
595       return true;
596     }
597     break;
598   }
599   case 'e': {
600     SmallVector<StringRef, 2> Groups;
601     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
602     if (R.match(Name, &Groups)) {
603       Intrinsic::ID ID = Intrinsic::not_intrinsic;
604       if (Groups[1] == "fadd")
605         ID = Intrinsic::experimental_vector_reduce_v2_fadd;
606       if (Groups[1] == "fmul")
607         ID = Intrinsic::experimental_vector_reduce_v2_fmul;
608 
609       if (ID != Intrinsic::not_intrinsic) {
610         rename(F);
611         auto Args = F->getFunctionType()->params();
612         Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
613         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
614         return true;
615       }
616     }
617     break;
618   }
619   case 'i':
620   case 'l': {
621     bool IsLifetimeStart = Name.startswith("lifetime.start");
622     if (IsLifetimeStart || Name.startswith("invariant.start")) {
623       Intrinsic::ID ID = IsLifetimeStart ?
624         Intrinsic::lifetime_start : Intrinsic::invariant_start;
625       auto Args = F->getFunctionType()->params();
626       Type* ObjectPtr[1] = {Args[1]};
627       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
628         rename(F);
629         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
630         return true;
631       }
632     }
633 
634     bool IsLifetimeEnd = Name.startswith("lifetime.end");
635     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
636       Intrinsic::ID ID = IsLifetimeEnd ?
637         Intrinsic::lifetime_end : Intrinsic::invariant_end;
638 
639       auto Args = F->getFunctionType()->params();
640       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
641       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
642         rename(F);
643         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
644         return true;
645       }
646     }
647     if (Name.startswith("invariant.group.barrier")) {
648       // Rename invariant.group.barrier to launder.invariant.group
649       auto Args = F->getFunctionType()->params();
650       Type* ObjectPtr[1] = {Args[0]};
651       rename(F);
652       NewFn = Intrinsic::getDeclaration(F->getParent(),
653           Intrinsic::launder_invariant_group, ObjectPtr);
654       return true;
655 
656     }
657 
658     break;
659   }
660   case 'm': {
661     if (Name.startswith("masked.load.")) {
662       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
663       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
664         rename(F);
665         NewFn = Intrinsic::getDeclaration(F->getParent(),
666                                           Intrinsic::masked_load,
667                                           Tys);
668         return true;
669       }
670     }
671     if (Name.startswith("masked.store.")) {
672       auto Args = F->getFunctionType()->params();
673       Type *Tys[] = { Args[0], Args[1] };
674       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
675         rename(F);
676         NewFn = Intrinsic::getDeclaration(F->getParent(),
677                                           Intrinsic::masked_store,
678                                           Tys);
679         return true;
680       }
681     }
682     // Renaming gather/scatter intrinsics with no address space overloading
683     // to the new overload which includes an address space
684     if (Name.startswith("masked.gather.")) {
685       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
686       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
687         rename(F);
688         NewFn = Intrinsic::getDeclaration(F->getParent(),
689                                           Intrinsic::masked_gather, Tys);
690         return true;
691       }
692     }
693     if (Name.startswith("masked.scatter.")) {
694       auto Args = F->getFunctionType()->params();
695       Type *Tys[] = {Args[0], Args[1]};
696       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
697         rename(F);
698         NewFn = Intrinsic::getDeclaration(F->getParent(),
699                                           Intrinsic::masked_scatter, Tys);
700         return true;
701       }
702     }
703     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
704     // alignment parameter to embedding the alignment as an attribute of
705     // the pointer args.
706     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
707       rename(F);
708       // Get the types of dest, src, and len
709       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
710       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
711                                         ParamTypes);
712       return true;
713     }
714     if (Name.startswith("memmove.") && F->arg_size() == 5) {
715       rename(F);
716       // Get the types of dest, src, and len
717       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
718       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
719                                         ParamTypes);
720       return true;
721     }
722     if (Name.startswith("memset.") && F->arg_size() == 5) {
723       rename(F);
724       // Get the types of dest, and len
725       const auto *FT = F->getFunctionType();
726       Type *ParamTypes[2] = {
727           FT->getParamType(0), // Dest
728           FT->getParamType(2)  // len
729       };
730       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
731                                         ParamTypes);
732       return true;
733     }
734     break;
735   }
736   case 'n': {
737     if (Name.startswith("nvvm.")) {
738       Name = Name.substr(5);
739 
740       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
741       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
742                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
743                               .Case("clz.i", Intrinsic::ctlz)
744                               .Case("popc.i", Intrinsic::ctpop)
745                               .Default(Intrinsic::not_intrinsic);
746       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
747         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
748                                           {F->getReturnType()});
749         return true;
750       }
751 
752       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
753       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
754       //
755       // TODO: We could add lohi.i2d.
756       bool Expand = StringSwitch<bool>(Name)
757                         .Cases("abs.i", "abs.ll", true)
758                         .Cases("clz.ll", "popc.ll", "h2f", true)
759                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
760                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
761                         .StartsWith("atomic.load.add.f32.p", true)
762                         .StartsWith("atomic.load.add.f64.p", true)
763                         .Default(false);
764       if (Expand) {
765         NewFn = nullptr;
766         return true;
767       }
768     }
769     break;
770   }
771   case 'o':
772     // We only need to change the name to match the mangling including the
773     // address space.
774     if (Name.startswith("objectsize.")) {
775       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
776       if (F->arg_size() == 2 || F->arg_size() == 3 ||
777           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
778         rename(F);
779         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
780                                           Tys);
781         return true;
782       }
783     }
784     break;
785 
786   case 'p':
787     if (Name == "prefetch") {
788       // Handle address space overloading.
789       Type *Tys[] = {F->arg_begin()->getType()};
790       if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
791         rename(F);
792         NewFn =
793             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
794         return true;
795       }
796     }
797     break;
798 
799   case 's':
800     if (Name == "stackprotectorcheck") {
801       NewFn = nullptr;
802       return true;
803     }
804     break;
805 
806   case 'x':
807     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
808       return true;
809   }
810   // Remangle our intrinsic since we upgrade the mangling
811   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
812   if (Result != None) {
813     NewFn = Result.getValue();
814     return true;
815   }
816 
817   //  This may not belong here. This function is effectively being overloaded
818   //  to both detect an intrinsic which needs upgrading, and to provide the
819   //  upgraded form of the intrinsic. We should perhaps have two separate
820   //  functions for this.
821   return false;
822 }
823 
824 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
825   NewFn = nullptr;
826   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
827   assert(F != NewFn && "Intrinsic function upgraded to the same function");
828 
829   // Upgrade intrinsic attributes.  This does not change the function.
830   if (NewFn)
831     F = NewFn;
832   if (Intrinsic::ID id = F->getIntrinsicID())
833     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
834   return Upgraded;
835 }
836 
837 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
838   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
839                           GV->getName() == "llvm.global_dtors")) ||
840       !GV->hasInitializer())
841     return nullptr;
842   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
843   if (!ATy)
844     return nullptr;
845   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
846   if (!STy || STy->getNumElements() != 2)
847     return nullptr;
848 
849   LLVMContext &C = GV->getContext();
850   IRBuilder<> IRB(C);
851   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
852                                IRB.getInt8PtrTy());
853   Constant *Init = GV->getInitializer();
854   unsigned N = Init->getNumOperands();
855   std::vector<Constant *> NewCtors(N);
856   for (unsigned i = 0; i != N; ++i) {
857     auto Ctor = cast<Constant>(Init->getOperand(i));
858     NewCtors[i] = ConstantStruct::get(
859         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
860         Constant::getNullValue(IRB.getInt8PtrTy()));
861   }
862   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
863 
864   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
865                             NewInit, GV->getName());
866 }
867 
868 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
869 // to byte shuffles.
870 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
871                                          Value *Op, unsigned Shift) {
872   Type *ResultTy = Op->getType();
873   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
874 
875   // Bitcast from a 64-bit element type to a byte element type.
876   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
877   Op = Builder.CreateBitCast(Op, VecTy, "cast");
878 
879   // We'll be shuffling in zeroes.
880   Value *Res = Constant::getNullValue(VecTy);
881 
882   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
883   // we'll just return the zero vector.
884   if (Shift < 16) {
885     uint32_t Idxs[64];
886     // 256/512-bit version is split into 2/4 16-byte lanes.
887     for (unsigned l = 0; l != NumElts; l += 16)
888       for (unsigned i = 0; i != 16; ++i) {
889         unsigned Idx = NumElts + i - Shift;
890         if (Idx < NumElts)
891           Idx -= NumElts - 16; // end of lane, switch operand.
892         Idxs[l + i] = Idx + l;
893       }
894 
895     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
896   }
897 
898   // Bitcast back to a 64-bit element type.
899   return Builder.CreateBitCast(Res, ResultTy, "cast");
900 }
901 
902 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
903 // to byte shuffles.
904 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
905                                          unsigned Shift) {
906   Type *ResultTy = Op->getType();
907   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
908 
909   // Bitcast from a 64-bit element type to a byte element type.
910   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
911   Op = Builder.CreateBitCast(Op, VecTy, "cast");
912 
913   // We'll be shuffling in zeroes.
914   Value *Res = Constant::getNullValue(VecTy);
915 
916   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
917   // we'll just return the zero vector.
918   if (Shift < 16) {
919     uint32_t Idxs[64];
920     // 256/512-bit version is split into 2/4 16-byte lanes.
921     for (unsigned l = 0; l != NumElts; l += 16)
922       for (unsigned i = 0; i != 16; ++i) {
923         unsigned Idx = i + Shift;
924         if (Idx >= 16)
925           Idx += NumElts - 16; // end of lane, switch operand.
926         Idxs[l + i] = Idx + l;
927       }
928 
929     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
930   }
931 
932   // Bitcast back to a 64-bit element type.
933   return Builder.CreateBitCast(Res, ResultTy, "cast");
934 }
935 
936 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
937                             unsigned NumElts) {
938   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
939                              cast<IntegerType>(Mask->getType())->getBitWidth());
940   Mask = Builder.CreateBitCast(Mask, MaskTy);
941 
942   // If we have less than 8 elements, then the starting mask was an i8 and
943   // we need to extract down to the right number of elements.
944   if (NumElts < 8) {
945     uint32_t Indices[4];
946     for (unsigned i = 0; i != NumElts; ++i)
947       Indices[i] = i;
948     Mask = Builder.CreateShuffleVector(Mask, Mask,
949                                        makeArrayRef(Indices, NumElts),
950                                        "extract");
951   }
952 
953   return Mask;
954 }
955 
956 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
957                             Value *Op0, Value *Op1) {
958   // If the mask is all ones just emit the first operation.
959   if (const auto *C = dyn_cast<Constant>(Mask))
960     if (C->isAllOnesValue())
961       return Op0;
962 
963   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
964   return Builder.CreateSelect(Mask, Op0, Op1);
965 }
966 
967 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
968                                   Value *Op0, Value *Op1) {
969   // If the mask is all ones just emit the first operation.
970   if (const auto *C = dyn_cast<Constant>(Mask))
971     if (C->isAllOnesValue())
972       return Op0;
973 
974   llvm::VectorType *MaskTy =
975     llvm::VectorType::get(Builder.getInt1Ty(),
976                           Mask->getType()->getIntegerBitWidth());
977   Mask = Builder.CreateBitCast(Mask, MaskTy);
978   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
979   return Builder.CreateSelect(Mask, Op0, Op1);
980 }
981 
982 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
983 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
984 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
985 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
986                                         Value *Op1, Value *Shift,
987                                         Value *Passthru, Value *Mask,
988                                         bool IsVALIGN) {
989   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
990 
991   unsigned NumElts = Op0->getType()->getVectorNumElements();
992   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
993   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
994   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
995 
996   // Mask the immediate for VALIGN.
997   if (IsVALIGN)
998     ShiftVal &= (NumElts - 1);
999 
1000   // If palignr is shifting the pair of vectors more than the size of two
1001   // lanes, emit zero.
1002   if (ShiftVal >= 32)
1003     return llvm::Constant::getNullValue(Op0->getType());
1004 
1005   // If palignr is shifting the pair of input vectors more than one lane,
1006   // but less than two lanes, convert to shifting in zeroes.
1007   if (ShiftVal > 16) {
1008     ShiftVal -= 16;
1009     Op1 = Op0;
1010     Op0 = llvm::Constant::getNullValue(Op0->getType());
1011   }
1012 
1013   uint32_t Indices[64];
1014   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1015   for (unsigned l = 0; l < NumElts; l += 16) {
1016     for (unsigned i = 0; i != 16; ++i) {
1017       unsigned Idx = ShiftVal + i;
1018       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1019         Idx += NumElts - 16; // End of lane, switch operand.
1020       Indices[l + i] = Idx + l;
1021     }
1022   }
1023 
1024   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1025                                              makeArrayRef(Indices, NumElts),
1026                                              "palignr");
1027 
1028   return EmitX86Select(Builder, Mask, Align, Passthru);
1029 }
1030 
1031 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1032                                           bool ZeroMask, bool IndexForm) {
1033   Type *Ty = CI.getType();
1034   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1035   unsigned EltWidth = Ty->getScalarSizeInBits();
1036   bool IsFloat = Ty->isFPOrFPVectorTy();
1037   Intrinsic::ID IID;
1038   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1039     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1040   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1041     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1042   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1043     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1044   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1045     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1046   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1047     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1048   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1049     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1050   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1051     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1052   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1053     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1054   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1055     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1056   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1057     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1058   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1059     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1060   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1061     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1062   else if (VecWidth == 128 && EltWidth == 16)
1063     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1064   else if (VecWidth == 256 && EltWidth == 16)
1065     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1066   else if (VecWidth == 512 && EltWidth == 16)
1067     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1068   else if (VecWidth == 128 && EltWidth == 8)
1069     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1070   else if (VecWidth == 256 && EltWidth == 8)
1071     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1072   else if (VecWidth == 512 && EltWidth == 8)
1073     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1074   else
1075     llvm_unreachable("Unexpected intrinsic");
1076 
1077   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1078                     CI.getArgOperand(2) };
1079 
1080   // If this isn't index form we need to swap operand 0 and 1.
1081   if (!IndexForm)
1082     std::swap(Args[0], Args[1]);
1083 
1084   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1085                                 Args);
1086   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1087                              : Builder.CreateBitCast(CI.getArgOperand(1),
1088                                                      Ty);
1089   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1090 }
1091 
1092 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1093                                             bool IsSigned, bool IsAddition) {
1094   Type *Ty = CI.getType();
1095   Value *Op0 = CI.getOperand(0);
1096   Value *Op1 = CI.getOperand(1);
1097 
1098   Intrinsic::ID IID =
1099       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1100                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1101   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1102   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1103 
1104   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1105     Value *VecSrc = CI.getOperand(2);
1106     Value *Mask = CI.getOperand(3);
1107     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1108   }
1109   return Res;
1110 }
1111 
1112 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1113                                bool IsRotateRight) {
1114   Type *Ty = CI.getType();
1115   Value *Src = CI.getArgOperand(0);
1116   Value *Amt = CI.getArgOperand(1);
1117 
1118   // Amount may be scalar immediate, in which case create a splat vector.
1119   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1120   // we only care about the lowest log2 bits anyway.
1121   if (Amt->getType() != Ty) {
1122     unsigned NumElts = Ty->getVectorNumElements();
1123     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1124     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1125   }
1126 
1127   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1128   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1129   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1130 
1131   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1132     Value *VecSrc = CI.getOperand(2);
1133     Value *Mask = CI.getOperand(3);
1134     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1135   }
1136   return Res;
1137 }
1138 
1139 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1140                               bool IsSigned) {
1141   Type *Ty = CI.getType();
1142   Value *LHS = CI.getArgOperand(0);
1143   Value *RHS = CI.getArgOperand(1);
1144 
1145   CmpInst::Predicate Pred;
1146   switch (Imm) {
1147   case 0x0:
1148     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1149     break;
1150   case 0x1:
1151     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1152     break;
1153   case 0x2:
1154     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1155     break;
1156   case 0x3:
1157     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1158     break;
1159   case 0x4:
1160     Pred = ICmpInst::ICMP_EQ;
1161     break;
1162   case 0x5:
1163     Pred = ICmpInst::ICMP_NE;
1164     break;
1165   case 0x6:
1166     return Constant::getNullValue(Ty); // FALSE
1167   case 0x7:
1168     return Constant::getAllOnesValue(Ty); // TRUE
1169   default:
1170     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1171   }
1172 
1173   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1174   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1175   return Ext;
1176 }
1177 
1178 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1179                                     bool IsShiftRight, bool ZeroMask) {
1180   Type *Ty = CI.getType();
1181   Value *Op0 = CI.getArgOperand(0);
1182   Value *Op1 = CI.getArgOperand(1);
1183   Value *Amt = CI.getArgOperand(2);
1184 
1185   if (IsShiftRight)
1186     std::swap(Op0, Op1);
1187 
1188   // Amount may be scalar immediate, in which case create a splat vector.
1189   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1190   // we only care about the lowest log2 bits anyway.
1191   if (Amt->getType() != Ty) {
1192     unsigned NumElts = Ty->getVectorNumElements();
1193     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1194     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1195   }
1196 
1197   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1198   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1199   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1200 
1201   unsigned NumArgs = CI.getNumArgOperands();
1202   if (NumArgs >= 4) { // For masked intrinsics.
1203     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1204                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1205                                    CI.getArgOperand(0);
1206     Value *Mask = CI.getOperand(NumArgs - 1);
1207     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1208   }
1209   return Res;
1210 }
1211 
1212 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1213                                  Value *Ptr, Value *Data, Value *Mask,
1214                                  bool Aligned) {
1215   // Cast the pointer to the right type.
1216   Ptr = Builder.CreateBitCast(Ptr,
1217                               llvm::PointerType::getUnqual(Data->getType()));
1218   unsigned Align =
1219     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1220 
1221   // If the mask is all ones just emit a regular store.
1222   if (const auto *C = dyn_cast<Constant>(Mask))
1223     if (C->isAllOnesValue())
1224       return Builder.CreateAlignedStore(Data, Ptr, Align);
1225 
1226   // Convert the mask from an integer type to a vector of i1.
1227   unsigned NumElts = Data->getType()->getVectorNumElements();
1228   Mask = getX86MaskVec(Builder, Mask, NumElts);
1229   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1230 }
1231 
1232 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1233                                 Value *Ptr, Value *Passthru, Value *Mask,
1234                                 bool Aligned) {
1235   Type *ValTy = Passthru->getType();
1236   // Cast the pointer to the right type.
1237   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1238   unsigned Align =
1239     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1240 
1241   // If the mask is all ones just emit a regular store.
1242   if (const auto *C = dyn_cast<Constant>(Mask))
1243     if (C->isAllOnesValue())
1244       return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1245 
1246   // Convert the mask from an integer type to a vector of i1.
1247   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1248   Mask = getX86MaskVec(Builder, Mask, NumElts);
1249   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1250 }
1251 
1252 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1253   Value *Op0 = CI.getArgOperand(0);
1254   llvm::Type *Ty = Op0->getType();
1255   Value *Zero = llvm::Constant::getNullValue(Ty);
1256   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1257   Value *Neg = Builder.CreateNeg(Op0);
1258   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1259 
1260   if (CI.getNumArgOperands() == 3)
1261     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1262 
1263   return Res;
1264 }
1265 
1266 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1267                                ICmpInst::Predicate Pred) {
1268   Value *Op0 = CI.getArgOperand(0);
1269   Value *Op1 = CI.getArgOperand(1);
1270   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1271   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1272 
1273   if (CI.getNumArgOperands() == 4)
1274     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1275 
1276   return Res;
1277 }
1278 
1279 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1280   Type *Ty = CI.getType();
1281 
1282   // Arguments have a vXi32 type so cast to vXi64.
1283   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1284   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1285 
1286   if (IsSigned) {
1287     // Shift left then arithmetic shift right.
1288     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1289     LHS = Builder.CreateShl(LHS, ShiftAmt);
1290     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1291     RHS = Builder.CreateShl(RHS, ShiftAmt);
1292     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1293   } else {
1294     // Clear the upper bits.
1295     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1296     LHS = Builder.CreateAnd(LHS, Mask);
1297     RHS = Builder.CreateAnd(RHS, Mask);
1298   }
1299 
1300   Value *Res = Builder.CreateMul(LHS, RHS);
1301 
1302   if (CI.getNumArgOperands() == 4)
1303     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1304 
1305   return Res;
1306 }
1307 
1308 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1309 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1310                                      Value *Mask) {
1311   unsigned NumElts = Vec->getType()->getVectorNumElements();
1312   if (Mask) {
1313     const auto *C = dyn_cast<Constant>(Mask);
1314     if (!C || !C->isAllOnesValue())
1315       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1316   }
1317 
1318   if (NumElts < 8) {
1319     uint32_t Indices[8];
1320     for (unsigned i = 0; i != NumElts; ++i)
1321       Indices[i] = i;
1322     for (unsigned i = NumElts; i != 8; ++i)
1323       Indices[i] = NumElts + i % NumElts;
1324     Vec = Builder.CreateShuffleVector(Vec,
1325                                       Constant::getNullValue(Vec->getType()),
1326                                       Indices);
1327   }
1328   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1329 }
1330 
1331 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1332                                    unsigned CC, bool Signed) {
1333   Value *Op0 = CI.getArgOperand(0);
1334   unsigned NumElts = Op0->getType()->getVectorNumElements();
1335 
1336   Value *Cmp;
1337   if (CC == 3) {
1338     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1339   } else if (CC == 7) {
1340     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1341   } else {
1342     ICmpInst::Predicate Pred;
1343     switch (CC) {
1344     default: llvm_unreachable("Unknown condition code");
1345     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1346     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1347     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1348     case 4: Pred = ICmpInst::ICMP_NE;  break;
1349     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1350     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1351     }
1352     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1353   }
1354 
1355   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1356 
1357   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1358 }
1359 
1360 // Replace a masked intrinsic with an older unmasked intrinsic.
1361 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1362                                     Intrinsic::ID IID) {
1363   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1364   Value *Rep = Builder.CreateCall(Intrin,
1365                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1366   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1367 }
1368 
1369 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1370   Value* A = CI.getArgOperand(0);
1371   Value* B = CI.getArgOperand(1);
1372   Value* Src = CI.getArgOperand(2);
1373   Value* Mask = CI.getArgOperand(3);
1374 
1375   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1376   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1377   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1378   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1379   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1380   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1381 }
1382 
1383 
1384 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1385   Value* Op = CI.getArgOperand(0);
1386   Type* ReturnOp = CI.getType();
1387   unsigned NumElts = CI.getType()->getVectorNumElements();
1388   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1389   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1390 }
1391 
1392 // Replace intrinsic with unmasked version and a select.
1393 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1394                                       CallInst &CI, Value *&Rep) {
1395   Name = Name.substr(12); // Remove avx512.mask.
1396 
1397   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1398   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1399   Intrinsic::ID IID;
1400   if (Name.startswith("max.p")) {
1401     if (VecWidth == 128 && EltWidth == 32)
1402       IID = Intrinsic::x86_sse_max_ps;
1403     else if (VecWidth == 128 && EltWidth == 64)
1404       IID = Intrinsic::x86_sse2_max_pd;
1405     else if (VecWidth == 256 && EltWidth == 32)
1406       IID = Intrinsic::x86_avx_max_ps_256;
1407     else if (VecWidth == 256 && EltWidth == 64)
1408       IID = Intrinsic::x86_avx_max_pd_256;
1409     else
1410       llvm_unreachable("Unexpected intrinsic");
1411   } else if (Name.startswith("min.p")) {
1412     if (VecWidth == 128 && EltWidth == 32)
1413       IID = Intrinsic::x86_sse_min_ps;
1414     else if (VecWidth == 128 && EltWidth == 64)
1415       IID = Intrinsic::x86_sse2_min_pd;
1416     else if (VecWidth == 256 && EltWidth == 32)
1417       IID = Intrinsic::x86_avx_min_ps_256;
1418     else if (VecWidth == 256 && EltWidth == 64)
1419       IID = Intrinsic::x86_avx_min_pd_256;
1420     else
1421       llvm_unreachable("Unexpected intrinsic");
1422   } else if (Name.startswith("pshuf.b.")) {
1423     if (VecWidth == 128)
1424       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1425     else if (VecWidth == 256)
1426       IID = Intrinsic::x86_avx2_pshuf_b;
1427     else if (VecWidth == 512)
1428       IID = Intrinsic::x86_avx512_pshuf_b_512;
1429     else
1430       llvm_unreachable("Unexpected intrinsic");
1431   } else if (Name.startswith("pmul.hr.sw.")) {
1432     if (VecWidth == 128)
1433       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1434     else if (VecWidth == 256)
1435       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1436     else if (VecWidth == 512)
1437       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1438     else
1439       llvm_unreachable("Unexpected intrinsic");
1440   } else if (Name.startswith("pmulh.w.")) {
1441     if (VecWidth == 128)
1442       IID = Intrinsic::x86_sse2_pmulh_w;
1443     else if (VecWidth == 256)
1444       IID = Intrinsic::x86_avx2_pmulh_w;
1445     else if (VecWidth == 512)
1446       IID = Intrinsic::x86_avx512_pmulh_w_512;
1447     else
1448       llvm_unreachable("Unexpected intrinsic");
1449   } else if (Name.startswith("pmulhu.w.")) {
1450     if (VecWidth == 128)
1451       IID = Intrinsic::x86_sse2_pmulhu_w;
1452     else if (VecWidth == 256)
1453       IID = Intrinsic::x86_avx2_pmulhu_w;
1454     else if (VecWidth == 512)
1455       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1456     else
1457       llvm_unreachable("Unexpected intrinsic");
1458   } else if (Name.startswith("pmaddw.d.")) {
1459     if (VecWidth == 128)
1460       IID = Intrinsic::x86_sse2_pmadd_wd;
1461     else if (VecWidth == 256)
1462       IID = Intrinsic::x86_avx2_pmadd_wd;
1463     else if (VecWidth == 512)
1464       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1465     else
1466       llvm_unreachable("Unexpected intrinsic");
1467   } else if (Name.startswith("pmaddubs.w.")) {
1468     if (VecWidth == 128)
1469       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1470     else if (VecWidth == 256)
1471       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1472     else if (VecWidth == 512)
1473       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1474     else
1475       llvm_unreachable("Unexpected intrinsic");
1476   } else if (Name.startswith("packsswb.")) {
1477     if (VecWidth == 128)
1478       IID = Intrinsic::x86_sse2_packsswb_128;
1479     else if (VecWidth == 256)
1480       IID = Intrinsic::x86_avx2_packsswb;
1481     else if (VecWidth == 512)
1482       IID = Intrinsic::x86_avx512_packsswb_512;
1483     else
1484       llvm_unreachable("Unexpected intrinsic");
1485   } else if (Name.startswith("packssdw.")) {
1486     if (VecWidth == 128)
1487       IID = Intrinsic::x86_sse2_packssdw_128;
1488     else if (VecWidth == 256)
1489       IID = Intrinsic::x86_avx2_packssdw;
1490     else if (VecWidth == 512)
1491       IID = Intrinsic::x86_avx512_packssdw_512;
1492     else
1493       llvm_unreachable("Unexpected intrinsic");
1494   } else if (Name.startswith("packuswb.")) {
1495     if (VecWidth == 128)
1496       IID = Intrinsic::x86_sse2_packuswb_128;
1497     else if (VecWidth == 256)
1498       IID = Intrinsic::x86_avx2_packuswb;
1499     else if (VecWidth == 512)
1500       IID = Intrinsic::x86_avx512_packuswb_512;
1501     else
1502       llvm_unreachable("Unexpected intrinsic");
1503   } else if (Name.startswith("packusdw.")) {
1504     if (VecWidth == 128)
1505       IID = Intrinsic::x86_sse41_packusdw;
1506     else if (VecWidth == 256)
1507       IID = Intrinsic::x86_avx2_packusdw;
1508     else if (VecWidth == 512)
1509       IID = Intrinsic::x86_avx512_packusdw_512;
1510     else
1511       llvm_unreachable("Unexpected intrinsic");
1512   } else if (Name.startswith("vpermilvar.")) {
1513     if (VecWidth == 128 && EltWidth == 32)
1514       IID = Intrinsic::x86_avx_vpermilvar_ps;
1515     else if (VecWidth == 128 && EltWidth == 64)
1516       IID = Intrinsic::x86_avx_vpermilvar_pd;
1517     else if (VecWidth == 256 && EltWidth == 32)
1518       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1519     else if (VecWidth == 256 && EltWidth == 64)
1520       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1521     else if (VecWidth == 512 && EltWidth == 32)
1522       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1523     else if (VecWidth == 512 && EltWidth == 64)
1524       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1525     else
1526       llvm_unreachable("Unexpected intrinsic");
1527   } else if (Name == "cvtpd2dq.256") {
1528     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1529   } else if (Name == "cvtpd2ps.256") {
1530     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1531   } else if (Name == "cvttpd2dq.256") {
1532     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1533   } else if (Name == "cvttps2dq.128") {
1534     IID = Intrinsic::x86_sse2_cvttps2dq;
1535   } else if (Name == "cvttps2dq.256") {
1536     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1537   } else if (Name.startswith("permvar.")) {
1538     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1539     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1540       IID = Intrinsic::x86_avx2_permps;
1541     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1542       IID = Intrinsic::x86_avx2_permd;
1543     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1544       IID = Intrinsic::x86_avx512_permvar_df_256;
1545     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1546       IID = Intrinsic::x86_avx512_permvar_di_256;
1547     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1548       IID = Intrinsic::x86_avx512_permvar_sf_512;
1549     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1550       IID = Intrinsic::x86_avx512_permvar_si_512;
1551     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1552       IID = Intrinsic::x86_avx512_permvar_df_512;
1553     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1554       IID = Intrinsic::x86_avx512_permvar_di_512;
1555     else if (VecWidth == 128 && EltWidth == 16)
1556       IID = Intrinsic::x86_avx512_permvar_hi_128;
1557     else if (VecWidth == 256 && EltWidth == 16)
1558       IID = Intrinsic::x86_avx512_permvar_hi_256;
1559     else if (VecWidth == 512 && EltWidth == 16)
1560       IID = Intrinsic::x86_avx512_permvar_hi_512;
1561     else if (VecWidth == 128 && EltWidth == 8)
1562       IID = Intrinsic::x86_avx512_permvar_qi_128;
1563     else if (VecWidth == 256 && EltWidth == 8)
1564       IID = Intrinsic::x86_avx512_permvar_qi_256;
1565     else if (VecWidth == 512 && EltWidth == 8)
1566       IID = Intrinsic::x86_avx512_permvar_qi_512;
1567     else
1568       llvm_unreachable("Unexpected intrinsic");
1569   } else if (Name.startswith("dbpsadbw.")) {
1570     if (VecWidth == 128)
1571       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1572     else if (VecWidth == 256)
1573       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1574     else if (VecWidth == 512)
1575       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1576     else
1577       llvm_unreachable("Unexpected intrinsic");
1578   } else if (Name.startswith("pmultishift.qb.")) {
1579     if (VecWidth == 128)
1580       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1581     else if (VecWidth == 256)
1582       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1583     else if (VecWidth == 512)
1584       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1585     else
1586       llvm_unreachable("Unexpected intrinsic");
1587   } else if (Name.startswith("conflict.")) {
1588     if (Name[9] == 'd' && VecWidth == 128)
1589       IID = Intrinsic::x86_avx512_conflict_d_128;
1590     else if (Name[9] == 'd' && VecWidth == 256)
1591       IID = Intrinsic::x86_avx512_conflict_d_256;
1592     else if (Name[9] == 'd' && VecWidth == 512)
1593       IID = Intrinsic::x86_avx512_conflict_d_512;
1594     else if (Name[9] == 'q' && VecWidth == 128)
1595       IID = Intrinsic::x86_avx512_conflict_q_128;
1596     else if (Name[9] == 'q' && VecWidth == 256)
1597       IID = Intrinsic::x86_avx512_conflict_q_256;
1598     else if (Name[9] == 'q' && VecWidth == 512)
1599       IID = Intrinsic::x86_avx512_conflict_q_512;
1600     else
1601       llvm_unreachable("Unexpected intrinsic");
1602   } else if (Name.startswith("pavg.")) {
1603     if (Name[5] == 'b' && VecWidth == 128)
1604       IID = Intrinsic::x86_sse2_pavg_b;
1605     else if (Name[5] == 'b' && VecWidth == 256)
1606       IID = Intrinsic::x86_avx2_pavg_b;
1607     else if (Name[5] == 'b' && VecWidth == 512)
1608       IID = Intrinsic::x86_avx512_pavg_b_512;
1609     else if (Name[5] == 'w' && VecWidth == 128)
1610       IID = Intrinsic::x86_sse2_pavg_w;
1611     else if (Name[5] == 'w' && VecWidth == 256)
1612       IID = Intrinsic::x86_avx2_pavg_w;
1613     else if (Name[5] == 'w' && VecWidth == 512)
1614       IID = Intrinsic::x86_avx512_pavg_w_512;
1615     else
1616       llvm_unreachable("Unexpected intrinsic");
1617   } else
1618     return false;
1619 
1620   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1621                                CI.arg_operands().end());
1622   Args.pop_back();
1623   Args.pop_back();
1624   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1625                            Args);
1626   unsigned NumArgs = CI.getNumArgOperands();
1627   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1628                       CI.getArgOperand(NumArgs - 2));
1629   return true;
1630 }
1631 
1632 /// Upgrade comment in call to inline asm that represents an objc retain release
1633 /// marker.
1634 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1635   size_t Pos;
1636   if (AsmStr->find("mov\tfp") == 0 &&
1637       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1638       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1639     AsmStr->replace(Pos, 1, ";");
1640   }
1641   return;
1642 }
1643 
1644 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1645 /// provided to seamlessly integrate with existing context.
1646 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1647   Function *F = CI->getCalledFunction();
1648   LLVMContext &C = CI->getContext();
1649   IRBuilder<> Builder(C);
1650   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1651 
1652   assert(F && "Intrinsic call is not direct?");
1653 
1654   if (!NewFn) {
1655     // Get the Function's name.
1656     StringRef Name = F->getName();
1657 
1658     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1659     Name = Name.substr(5);
1660 
1661     bool IsX86 = Name.startswith("x86.");
1662     if (IsX86)
1663       Name = Name.substr(4);
1664     bool IsNVVM = Name.startswith("nvvm.");
1665     if (IsNVVM)
1666       Name = Name.substr(5);
1667 
1668     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1669       Module *M = F->getParent();
1670       SmallVector<Metadata *, 1> Elts;
1671       Elts.push_back(
1672           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1673       MDNode *Node = MDNode::get(C, Elts);
1674 
1675       Value *Arg0 = CI->getArgOperand(0);
1676       Value *Arg1 = CI->getArgOperand(1);
1677 
1678       // Nontemporal (unaligned) store of the 0'th element of the float/double
1679       // vector.
1680       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1681       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1682       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1683       Value *Extract =
1684           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1685 
1686       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1687       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1688 
1689       // Remove intrinsic.
1690       CI->eraseFromParent();
1691       return;
1692     }
1693 
1694     if (IsX86 && (Name.startswith("avx.movnt.") ||
1695                   Name.startswith("avx512.storent."))) {
1696       Module *M = F->getParent();
1697       SmallVector<Metadata *, 1> Elts;
1698       Elts.push_back(
1699           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1700       MDNode *Node = MDNode::get(C, Elts);
1701 
1702       Value *Arg0 = CI->getArgOperand(0);
1703       Value *Arg1 = CI->getArgOperand(1);
1704 
1705       // Convert the type of the pointer to a pointer to the stored type.
1706       Value *BC = Builder.CreateBitCast(Arg0,
1707                                         PointerType::getUnqual(Arg1->getType()),
1708                                         "cast");
1709       VectorType *VTy = cast<VectorType>(Arg1->getType());
1710       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1711                                                  VTy->getBitWidth() / 8);
1712       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1713 
1714       // Remove intrinsic.
1715       CI->eraseFromParent();
1716       return;
1717     }
1718 
1719     if (IsX86 && Name == "sse2.storel.dq") {
1720       Value *Arg0 = CI->getArgOperand(0);
1721       Value *Arg1 = CI->getArgOperand(1);
1722 
1723       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1724       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1725       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1726       Value *BC = Builder.CreateBitCast(Arg0,
1727                                         PointerType::getUnqual(Elt->getType()),
1728                                         "cast");
1729       Builder.CreateAlignedStore(Elt, BC, 1);
1730 
1731       // Remove intrinsic.
1732       CI->eraseFromParent();
1733       return;
1734     }
1735 
1736     if (IsX86 && (Name.startswith("sse.storeu.") ||
1737                   Name.startswith("sse2.storeu.") ||
1738                   Name.startswith("avx.storeu."))) {
1739       Value *Arg0 = CI->getArgOperand(0);
1740       Value *Arg1 = CI->getArgOperand(1);
1741 
1742       Arg0 = Builder.CreateBitCast(Arg0,
1743                                    PointerType::getUnqual(Arg1->getType()),
1744                                    "cast");
1745       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1746 
1747       // Remove intrinsic.
1748       CI->eraseFromParent();
1749       return;
1750     }
1751 
1752     if (IsX86 && Name == "avx512.mask.store.ss") {
1753       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1754       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1755                          Mask, false);
1756 
1757       // Remove intrinsic.
1758       CI->eraseFromParent();
1759       return;
1760     }
1761 
1762     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1763       // "avx512.mask.storeu." or "avx512.mask.store."
1764       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1765       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1766                          CI->getArgOperand(2), Aligned);
1767 
1768       // Remove intrinsic.
1769       CI->eraseFromParent();
1770       return;
1771     }
1772 
1773     Value *Rep;
1774     // Upgrade packed integer vector compare intrinsics to compare instructions.
1775     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1776                   Name.startswith("avx2.pcmp"))) {
1777       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1778       bool CmpEq = Name[9] == 'e';
1779       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1780                                CI->getArgOperand(0), CI->getArgOperand(1));
1781       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1782     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1783       Type *ExtTy = Type::getInt32Ty(C);
1784       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1785         ExtTy = Type::getInt64Ty(C);
1786       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1787                          ExtTy->getPrimitiveSizeInBits();
1788       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1789       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1790     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1791                          Name == "sse2.sqrt.sd")) {
1792       Value *Vec = CI->getArgOperand(0);
1793       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1794       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1795                                                  Intrinsic::sqrt, Elt0->getType());
1796       Elt0 = Builder.CreateCall(Intr, Elt0);
1797       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1798     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1799                          Name.startswith("sse2.sqrt.p") ||
1800                          Name.startswith("sse.sqrt.p"))) {
1801       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1802                                                          Intrinsic::sqrt,
1803                                                          CI->getType()),
1804                                {CI->getArgOperand(0)});
1805     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1806       if (CI->getNumArgOperands() == 4 &&
1807           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1808            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1809         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1810                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1811 
1812         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1813         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1814                                                            IID), Args);
1815       } else {
1816         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1817                                                            Intrinsic::sqrt,
1818                                                            CI->getType()),
1819                                  {CI->getArgOperand(0)});
1820       }
1821       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1822                           CI->getArgOperand(1));
1823     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1824                          Name.startswith("avx512.ptestnm"))) {
1825       Value *Op0 = CI->getArgOperand(0);
1826       Value *Op1 = CI->getArgOperand(1);
1827       Value *Mask = CI->getArgOperand(2);
1828       Rep = Builder.CreateAnd(Op0, Op1);
1829       llvm::Type *Ty = Op0->getType();
1830       Value *Zero = llvm::Constant::getNullValue(Ty);
1831       ICmpInst::Predicate Pred =
1832         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1833       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1834       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1835     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1836       unsigned NumElts =
1837           CI->getArgOperand(1)->getType()->getVectorNumElements();
1838       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1839       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1840                           CI->getArgOperand(1));
1841     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1842       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1843       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1844       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1845       uint32_t Indices[64];
1846       for (unsigned i = 0; i != NumElts; ++i)
1847         Indices[i] = i;
1848 
1849       // First extract half of each vector. This gives better codegen than
1850       // doing it in a single shuffle.
1851       LHS = Builder.CreateShuffleVector(LHS, LHS,
1852                                         makeArrayRef(Indices, NumElts / 2));
1853       RHS = Builder.CreateShuffleVector(RHS, RHS,
1854                                         makeArrayRef(Indices, NumElts / 2));
1855       // Concat the vectors.
1856       // NOTE: Operands have to be swapped to match intrinsic definition.
1857       Rep = Builder.CreateShuffleVector(RHS, LHS,
1858                                         makeArrayRef(Indices, NumElts));
1859       Rep = Builder.CreateBitCast(Rep, CI->getType());
1860     } else if (IsX86 && Name == "avx512.kand.w") {
1861       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1862       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1863       Rep = Builder.CreateAnd(LHS, RHS);
1864       Rep = Builder.CreateBitCast(Rep, CI->getType());
1865     } else if (IsX86 && Name == "avx512.kandn.w") {
1866       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1867       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1868       LHS = Builder.CreateNot(LHS);
1869       Rep = Builder.CreateAnd(LHS, RHS);
1870       Rep = Builder.CreateBitCast(Rep, CI->getType());
1871     } else if (IsX86 && Name == "avx512.kor.w") {
1872       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1873       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1874       Rep = Builder.CreateOr(LHS, RHS);
1875       Rep = Builder.CreateBitCast(Rep, CI->getType());
1876     } else if (IsX86 && Name == "avx512.kxor.w") {
1877       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1878       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1879       Rep = Builder.CreateXor(LHS, RHS);
1880       Rep = Builder.CreateBitCast(Rep, CI->getType());
1881     } else if (IsX86 && Name == "avx512.kxnor.w") {
1882       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1883       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1884       LHS = Builder.CreateNot(LHS);
1885       Rep = Builder.CreateXor(LHS, RHS);
1886       Rep = Builder.CreateBitCast(Rep, CI->getType());
1887     } else if (IsX86 && Name == "avx512.knot.w") {
1888       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1889       Rep = Builder.CreateNot(Rep);
1890       Rep = Builder.CreateBitCast(Rep, CI->getType());
1891     } else if (IsX86 &&
1892                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1893       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1894       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1895       Rep = Builder.CreateOr(LHS, RHS);
1896       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1897       Value *C;
1898       if (Name[14] == 'c')
1899         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1900       else
1901         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1902       Rep = Builder.CreateICmpEQ(Rep, C);
1903       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1904     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1905                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1906                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1907                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1908       Type *I32Ty = Type::getInt32Ty(C);
1909       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1910                                                  ConstantInt::get(I32Ty, 0));
1911       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1912                                                  ConstantInt::get(I32Ty, 0));
1913       Value *EltOp;
1914       if (Name.contains(".add."))
1915         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1916       else if (Name.contains(".sub."))
1917         EltOp = Builder.CreateFSub(Elt0, Elt1);
1918       else if (Name.contains(".mul."))
1919         EltOp = Builder.CreateFMul(Elt0, Elt1);
1920       else
1921         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1922       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1923                                         ConstantInt::get(I32Ty, 0));
1924     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1925       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1926       bool CmpEq = Name[16] == 'e';
1927       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1928     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1929       Type *OpTy = CI->getArgOperand(0)->getType();
1930       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1931       Intrinsic::ID IID;
1932       switch (VecWidth) {
1933       default: llvm_unreachable("Unexpected intrinsic");
1934       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1935       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1936       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1937       }
1938 
1939       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1940                                { CI->getOperand(0), CI->getArgOperand(1) });
1941       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1942     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1943       Type *OpTy = CI->getArgOperand(0)->getType();
1944       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1945       unsigned EltWidth = OpTy->getScalarSizeInBits();
1946       Intrinsic::ID IID;
1947       if (VecWidth == 128 && EltWidth == 32)
1948         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1949       else if (VecWidth == 256 && EltWidth == 32)
1950         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1951       else if (VecWidth == 512 && EltWidth == 32)
1952         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1953       else if (VecWidth == 128 && EltWidth == 64)
1954         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1955       else if (VecWidth == 256 && EltWidth == 64)
1956         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1957       else if (VecWidth == 512 && EltWidth == 64)
1958         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1959       else
1960         llvm_unreachable("Unexpected intrinsic");
1961 
1962       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1963                                { CI->getOperand(0), CI->getArgOperand(1) });
1964       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1965     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1966       Type *OpTy = CI->getArgOperand(0)->getType();
1967       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1968       unsigned EltWidth = OpTy->getScalarSizeInBits();
1969       Intrinsic::ID IID;
1970       if (VecWidth == 128 && EltWidth == 32)
1971         IID = Intrinsic::x86_avx512_cmp_ps_128;
1972       else if (VecWidth == 256 && EltWidth == 32)
1973         IID = Intrinsic::x86_avx512_cmp_ps_256;
1974       else if (VecWidth == 512 && EltWidth == 32)
1975         IID = Intrinsic::x86_avx512_cmp_ps_512;
1976       else if (VecWidth == 128 && EltWidth == 64)
1977         IID = Intrinsic::x86_avx512_cmp_pd_128;
1978       else if (VecWidth == 256 && EltWidth == 64)
1979         IID = Intrinsic::x86_avx512_cmp_pd_256;
1980       else if (VecWidth == 512 && EltWidth == 64)
1981         IID = Intrinsic::x86_avx512_cmp_pd_512;
1982       else
1983         llvm_unreachable("Unexpected intrinsic");
1984 
1985       SmallVector<Value *, 4> Args;
1986       Args.push_back(CI->getArgOperand(0));
1987       Args.push_back(CI->getArgOperand(1));
1988       Args.push_back(CI->getArgOperand(2));
1989       if (CI->getNumArgOperands() == 5)
1990         Args.push_back(CI->getArgOperand(4));
1991 
1992       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1993                                Args);
1994       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1995     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1996                Name[16] != 'p') {
1997       // Integer compare intrinsics.
1998       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1999       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2000     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2001       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2002       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2003     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2004                          Name.startswith("avx512.cvtw2mask.") ||
2005                          Name.startswith("avx512.cvtd2mask.") ||
2006                          Name.startswith("avx512.cvtq2mask."))) {
2007       Value *Op = CI->getArgOperand(0);
2008       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2009       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2010       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2011     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2012                         Name == "ssse3.pabs.w.128" ||
2013                         Name == "ssse3.pabs.d.128" ||
2014                         Name.startswith("avx2.pabs") ||
2015                         Name.startswith("avx512.mask.pabs"))) {
2016       Rep = upgradeAbs(Builder, *CI);
2017     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2018                          Name == "sse2.pmaxs.w" ||
2019                          Name == "sse41.pmaxsd" ||
2020                          Name.startswith("avx2.pmaxs") ||
2021                          Name.startswith("avx512.mask.pmaxs"))) {
2022       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2023     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2024                          Name == "sse41.pmaxuw" ||
2025                          Name == "sse41.pmaxud" ||
2026                          Name.startswith("avx2.pmaxu") ||
2027                          Name.startswith("avx512.mask.pmaxu"))) {
2028       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2029     } else if (IsX86 && (Name == "sse41.pminsb" ||
2030                          Name == "sse2.pmins.w" ||
2031                          Name == "sse41.pminsd" ||
2032                          Name.startswith("avx2.pmins") ||
2033                          Name.startswith("avx512.mask.pmins"))) {
2034       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2035     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2036                          Name == "sse41.pminuw" ||
2037                          Name == "sse41.pminud" ||
2038                          Name.startswith("avx2.pminu") ||
2039                          Name.startswith("avx512.mask.pminu"))) {
2040       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2041     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2042                          Name == "avx2.pmulu.dq" ||
2043                          Name == "avx512.pmulu.dq.512" ||
2044                          Name.startswith("avx512.mask.pmulu.dq."))) {
2045       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2046     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2047                          Name == "avx2.pmul.dq" ||
2048                          Name == "avx512.pmul.dq.512" ||
2049                          Name.startswith("avx512.mask.pmul.dq."))) {
2050       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2051     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2052                          Name == "sse2.cvtsi2sd" ||
2053                          Name == "sse.cvtsi642ss" ||
2054                          Name == "sse2.cvtsi642sd")) {
2055       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2056                                  CI->getType()->getVectorElementType());
2057       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2058     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2059       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2060                                  CI->getType()->getVectorElementType());
2061       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2062     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2063       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2064       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2065       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2066     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2067                          Name == "sse2.cvtdq2ps" ||
2068                          Name == "avx.cvtdq2.pd.256" ||
2069                          Name == "avx.cvtdq2.ps.256" ||
2070                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2071                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2072                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2073                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2074                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2075                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2076                          Name == "avx512.mask.cvtqq2ps.256" ||
2077                          Name == "avx512.mask.cvtqq2ps.512" ||
2078                          Name == "avx512.mask.cvtuqq2ps.256" ||
2079                          Name == "avx512.mask.cvtuqq2ps.512" ||
2080                          Name == "sse2.cvtps2pd" ||
2081                          Name == "avx.cvt.ps2.pd.256" ||
2082                          Name == "avx512.mask.cvtps2pd.128" ||
2083                          Name == "avx512.mask.cvtps2pd.256")) {
2084       Type *DstTy = CI->getType();
2085       Rep = CI->getArgOperand(0);
2086       Type *SrcTy = Rep->getType();
2087 
2088       unsigned NumDstElts = DstTy->getVectorNumElements();
2089       if (NumDstElts < SrcTy->getVectorNumElements()) {
2090         assert(NumDstElts == 2 && "Unexpected vector size");
2091         uint32_t ShuffleMask[2] = { 0, 1 };
2092         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2093       }
2094 
2095       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2096       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2097       if (IsPS2PD)
2098         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2099       else if (CI->getNumArgOperands() == 4 &&
2100                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2101                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2102         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2103                                        : Intrinsic::x86_avx512_sitofp_round;
2104         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2105                                                 { DstTy, SrcTy });
2106         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2107       } else {
2108         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2109                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2110       }
2111 
2112       if (CI->getNumArgOperands() >= 3)
2113         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2114                             CI->getArgOperand(1));
2115     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2116       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2117                               CI->getArgOperand(1), CI->getArgOperand(2),
2118                               /*Aligned*/false);
2119     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2120       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2121                               CI->getArgOperand(1),CI->getArgOperand(2),
2122                               /*Aligned*/true);
2123     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2124       Type *ResultTy = CI->getType();
2125       Type *PtrTy = ResultTy->getVectorElementType();
2126 
2127       // Cast the pointer to element type.
2128       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2129                                          llvm::PointerType::getUnqual(PtrTy));
2130 
2131       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2132                                      ResultTy->getVectorNumElements());
2133 
2134       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2135                                                 Intrinsic::masked_expandload,
2136                                                 ResultTy);
2137       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2138     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2139       Type *ResultTy = CI->getArgOperand(1)->getType();
2140       Type *PtrTy = ResultTy->getVectorElementType();
2141 
2142       // Cast the pointer to element type.
2143       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2144                                          llvm::PointerType::getUnqual(PtrTy));
2145 
2146       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2147                                      ResultTy->getVectorNumElements());
2148 
2149       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2150                                                 Intrinsic::masked_compressstore,
2151                                                 ResultTy);
2152       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2153     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2154                          Name.startswith("avx512.mask.expand."))) {
2155       Type *ResultTy = CI->getType();
2156 
2157       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2158                                      ResultTy->getVectorNumElements());
2159 
2160       bool IsCompress = Name[12] == 'c';
2161       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2162                                      : Intrinsic::x86_avx512_mask_expand;
2163       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2164       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2165                                        MaskVec });
2166     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2167       bool IsSigned;
2168       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2169           Name.endswith("uq"))
2170         IsSigned = false;
2171       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2172                Name.endswith("q"))
2173         IsSigned = true;
2174       else
2175         llvm_unreachable("Unknown suffix");
2176 
2177       unsigned Imm;
2178       if (CI->getNumArgOperands() == 3) {
2179         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2180       } else {
2181         Name = Name.substr(9); // strip off "xop.vpcom"
2182         if (Name.startswith("lt"))
2183           Imm = 0;
2184         else if (Name.startswith("le"))
2185           Imm = 1;
2186         else if (Name.startswith("gt"))
2187           Imm = 2;
2188         else if (Name.startswith("ge"))
2189           Imm = 3;
2190         else if (Name.startswith("eq"))
2191           Imm = 4;
2192         else if (Name.startswith("ne"))
2193           Imm = 5;
2194         else if (Name.startswith("false"))
2195           Imm = 6;
2196         else if (Name.startswith("true"))
2197           Imm = 7;
2198         else
2199           llvm_unreachable("Unknown condition");
2200       }
2201 
2202       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2203     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2204       Value *Sel = CI->getArgOperand(2);
2205       Value *NotSel = Builder.CreateNot(Sel);
2206       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2207       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2208       Rep = Builder.CreateOr(Sel0, Sel1);
2209     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2210                          Name.startswith("avx512.prol") ||
2211                          Name.startswith("avx512.mask.prol"))) {
2212       Rep = upgradeX86Rotate(Builder, *CI, false);
2213     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2214                          Name.startswith("avx512.mask.pror"))) {
2215       Rep = upgradeX86Rotate(Builder, *CI, true);
2216     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2217                          Name.startswith("avx512.mask.vpshld") ||
2218                          Name.startswith("avx512.maskz.vpshld"))) {
2219       bool ZeroMask = Name[11] == 'z';
2220       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2221     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2222                          Name.startswith("avx512.mask.vpshrd") ||
2223                          Name.startswith("avx512.maskz.vpshrd"))) {
2224       bool ZeroMask = Name[11] == 'z';
2225       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2226     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2227       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2228                                                Intrinsic::x86_sse42_crc32_32_8);
2229       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2230       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2231       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2232     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2233                          Name.startswith("avx512.vbroadcast.s"))) {
2234       // Replace broadcasts with a series of insertelements.
2235       Type *VecTy = CI->getType();
2236       Type *EltTy = VecTy->getVectorElementType();
2237       unsigned EltNum = VecTy->getVectorNumElements();
2238       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2239                                           EltTy->getPointerTo());
2240       Value *Load = Builder.CreateLoad(EltTy, Cast);
2241       Type *I32Ty = Type::getInt32Ty(C);
2242       Rep = UndefValue::get(VecTy);
2243       for (unsigned I = 0; I < EltNum; ++I)
2244         Rep = Builder.CreateInsertElement(Rep, Load,
2245                                           ConstantInt::get(I32Ty, I));
2246     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2247                          Name.startswith("sse41.pmovzx") ||
2248                          Name.startswith("avx2.pmovsx") ||
2249                          Name.startswith("avx2.pmovzx") ||
2250                          Name.startswith("avx512.mask.pmovsx") ||
2251                          Name.startswith("avx512.mask.pmovzx"))) {
2252       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2253       VectorType *DstTy = cast<VectorType>(CI->getType());
2254       unsigned NumDstElts = DstTy->getNumElements();
2255 
2256       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2257       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2258       for (unsigned i = 0; i != NumDstElts; ++i)
2259         ShuffleMask[i] = i;
2260 
2261       Value *SV = Builder.CreateShuffleVector(
2262           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2263 
2264       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2265       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2266                    : Builder.CreateZExt(SV, DstTy);
2267       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2268       if (CI->getNumArgOperands() == 3)
2269         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2270                             CI->getArgOperand(1));
2271     } else if (Name == "avx512.mask.pmov.qd.256" ||
2272                Name == "avx512.mask.pmov.qd.512" ||
2273                Name == "avx512.mask.pmov.wb.256" ||
2274                Name == "avx512.mask.pmov.wb.512") {
2275       Type *Ty = CI->getArgOperand(1)->getType();
2276       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2277       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2278                           CI->getArgOperand(1));
2279     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2280                          Name == "avx2.vbroadcasti128")) {
2281       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2282       Type *EltTy = CI->getType()->getVectorElementType();
2283       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2284       Type *VT = VectorType::get(EltTy, NumSrcElts);
2285       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2286                                             PointerType::getUnqual(VT));
2287       Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2288       if (NumSrcElts == 2)
2289         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2290                                           { 0, 1, 0, 1 });
2291       else
2292         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2293                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2294     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2295                          Name.startswith("avx512.mask.shuf.f"))) {
2296       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2297       Type *VT = CI->getType();
2298       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2299       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2300       unsigned ControlBitsMask = NumLanes - 1;
2301       unsigned NumControlBits = NumLanes / 2;
2302       SmallVector<uint32_t, 8> ShuffleMask(0);
2303 
2304       for (unsigned l = 0; l != NumLanes; ++l) {
2305         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2306         // We actually need the other source.
2307         if (l >= NumLanes / 2)
2308           LaneMask += NumLanes;
2309         for (unsigned i = 0; i != NumElementsInLane; ++i)
2310           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2311       }
2312       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2313                                         CI->getArgOperand(1), ShuffleMask);
2314       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2315                           CI->getArgOperand(3));
2316     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2317                          Name.startswith("avx512.mask.broadcasti"))) {
2318       unsigned NumSrcElts =
2319                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2320       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2321 
2322       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2323       for (unsigned i = 0; i != NumDstElts; ++i)
2324         ShuffleMask[i] = i % NumSrcElts;
2325 
2326       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2327                                         CI->getArgOperand(0),
2328                                         ShuffleMask);
2329       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2330                           CI->getArgOperand(1));
2331     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2332                          Name.startswith("avx2.vbroadcast") ||
2333                          Name.startswith("avx512.pbroadcast") ||
2334                          Name.startswith("avx512.mask.broadcast.s"))) {
2335       // Replace vp?broadcasts with a vector shuffle.
2336       Value *Op = CI->getArgOperand(0);
2337       unsigned NumElts = CI->getType()->getVectorNumElements();
2338       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2339       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2340                                         Constant::getNullValue(MaskTy));
2341 
2342       if (CI->getNumArgOperands() == 3)
2343         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2344                             CI->getArgOperand(1));
2345     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2346                          Name.startswith("sse2.psubs.") ||
2347                          Name.startswith("avx2.padds.") ||
2348                          Name.startswith("avx2.psubs.") ||
2349                          Name.startswith("avx512.padds.") ||
2350                          Name.startswith("avx512.psubs.") ||
2351                          Name.startswith("avx512.mask.padds.") ||
2352                          Name.startswith("avx512.mask.psubs."))) {
2353       bool IsAdd = Name.contains(".padds");
2354       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2355     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2356                          Name.startswith("sse2.psubus.") ||
2357                          Name.startswith("avx2.paddus.") ||
2358                          Name.startswith("avx2.psubus.") ||
2359                          Name.startswith("avx512.mask.paddus.") ||
2360                          Name.startswith("avx512.mask.psubus."))) {
2361       bool IsAdd = Name.contains(".paddus");
2362       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2363     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2364       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2365                                       CI->getArgOperand(1),
2366                                       CI->getArgOperand(2),
2367                                       CI->getArgOperand(3),
2368                                       CI->getArgOperand(4),
2369                                       false);
2370     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2371       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2372                                       CI->getArgOperand(1),
2373                                       CI->getArgOperand(2),
2374                                       CI->getArgOperand(3),
2375                                       CI->getArgOperand(4),
2376                                       true);
2377     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2378                          Name == "avx2.psll.dq")) {
2379       // 128/256-bit shift left specified in bits.
2380       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2381       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2382                                        Shift / 8); // Shift is in bits.
2383     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2384                          Name == "avx2.psrl.dq")) {
2385       // 128/256-bit shift right specified in bits.
2386       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2387       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2388                                        Shift / 8); // Shift is in bits.
2389     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2390                          Name == "avx2.psll.dq.bs" ||
2391                          Name == "avx512.psll.dq.512")) {
2392       // 128/256/512-bit shift left specified in bytes.
2393       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2394       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2395     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2396                          Name == "avx2.psrl.dq.bs" ||
2397                          Name == "avx512.psrl.dq.512")) {
2398       // 128/256/512-bit shift right specified in bytes.
2399       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2400       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2401     } else if (IsX86 && (Name == "sse41.pblendw" ||
2402                          Name.startswith("sse41.blendp") ||
2403                          Name.startswith("avx.blend.p") ||
2404                          Name == "avx2.pblendw" ||
2405                          Name.startswith("avx2.pblendd."))) {
2406       Value *Op0 = CI->getArgOperand(0);
2407       Value *Op1 = CI->getArgOperand(1);
2408       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2409       VectorType *VecTy = cast<VectorType>(CI->getType());
2410       unsigned NumElts = VecTy->getNumElements();
2411 
2412       SmallVector<uint32_t, 16> Idxs(NumElts);
2413       for (unsigned i = 0; i != NumElts; ++i)
2414         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2415 
2416       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2417     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2418                          Name == "avx2.vinserti128" ||
2419                          Name.startswith("avx512.mask.insert"))) {
2420       Value *Op0 = CI->getArgOperand(0);
2421       Value *Op1 = CI->getArgOperand(1);
2422       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2423       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2424       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2425       unsigned Scale = DstNumElts / SrcNumElts;
2426 
2427       // Mask off the high bits of the immediate value; hardware ignores those.
2428       Imm = Imm % Scale;
2429 
2430       // Extend the second operand into a vector the size of the destination.
2431       Value *UndefV = UndefValue::get(Op1->getType());
2432       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2433       for (unsigned i = 0; i != SrcNumElts; ++i)
2434         Idxs[i] = i;
2435       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2436         Idxs[i] = SrcNumElts;
2437       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2438 
2439       // Insert the second operand into the first operand.
2440 
2441       // Note that there is no guarantee that instruction lowering will actually
2442       // produce a vinsertf128 instruction for the created shuffles. In
2443       // particular, the 0 immediate case involves no lane changes, so it can
2444       // be handled as a blend.
2445 
2446       // Example of shuffle mask for 32-bit elements:
2447       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2448       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2449 
2450       // First fill with identify mask.
2451       for (unsigned i = 0; i != DstNumElts; ++i)
2452         Idxs[i] = i;
2453       // Then replace the elements where we need to insert.
2454       for (unsigned i = 0; i != SrcNumElts; ++i)
2455         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2456       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2457 
2458       // If the intrinsic has a mask operand, handle that.
2459       if (CI->getNumArgOperands() == 5)
2460         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2461                             CI->getArgOperand(3));
2462     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2463                          Name == "avx2.vextracti128" ||
2464                          Name.startswith("avx512.mask.vextract"))) {
2465       Value *Op0 = CI->getArgOperand(0);
2466       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2467       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2468       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2469       unsigned Scale = SrcNumElts / DstNumElts;
2470 
2471       // Mask off the high bits of the immediate value; hardware ignores those.
2472       Imm = Imm % Scale;
2473 
2474       // Get indexes for the subvector of the input vector.
2475       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2476       for (unsigned i = 0; i != DstNumElts; ++i) {
2477         Idxs[i] = i + (Imm * DstNumElts);
2478       }
2479       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2480 
2481       // If the intrinsic has a mask operand, handle that.
2482       if (CI->getNumArgOperands() == 4)
2483         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2484                             CI->getArgOperand(2));
2485     } else if (!IsX86 && Name == "stackprotectorcheck") {
2486       Rep = nullptr;
2487     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2488                          Name.startswith("avx512.mask.perm.di."))) {
2489       Value *Op0 = CI->getArgOperand(0);
2490       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2491       VectorType *VecTy = cast<VectorType>(CI->getType());
2492       unsigned NumElts = VecTy->getNumElements();
2493 
2494       SmallVector<uint32_t, 8> Idxs(NumElts);
2495       for (unsigned i = 0; i != NumElts; ++i)
2496         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2497 
2498       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2499 
2500       if (CI->getNumArgOperands() == 4)
2501         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2502                             CI->getArgOperand(2));
2503     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2504                          Name == "avx2.vperm2i128")) {
2505       // The immediate permute control byte looks like this:
2506       //    [1:0] - select 128 bits from sources for low half of destination
2507       //    [2]   - ignore
2508       //    [3]   - zero low half of destination
2509       //    [5:4] - select 128 bits from sources for high half of destination
2510       //    [6]   - ignore
2511       //    [7]   - zero high half of destination
2512 
2513       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2514 
2515       unsigned NumElts = CI->getType()->getVectorNumElements();
2516       unsigned HalfSize = NumElts / 2;
2517       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2518 
2519       // Determine which operand(s) are actually in use for this instruction.
2520       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2521       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2522 
2523       // If needed, replace operands based on zero mask.
2524       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2525       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2526 
2527       // Permute low half of result.
2528       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2529       for (unsigned i = 0; i < HalfSize; ++i)
2530         ShuffleMask[i] = StartIndex + i;
2531 
2532       // Permute high half of result.
2533       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2534       for (unsigned i = 0; i < HalfSize; ++i)
2535         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2536 
2537       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2538 
2539     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2540                          Name == "sse2.pshuf.d" ||
2541                          Name.startswith("avx512.mask.vpermil.p") ||
2542                          Name.startswith("avx512.mask.pshuf.d."))) {
2543       Value *Op0 = CI->getArgOperand(0);
2544       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2545       VectorType *VecTy = cast<VectorType>(CI->getType());
2546       unsigned NumElts = VecTy->getNumElements();
2547       // Calculate the size of each index in the immediate.
2548       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2549       unsigned IdxMask = ((1 << IdxSize) - 1);
2550 
2551       SmallVector<uint32_t, 8> Idxs(NumElts);
2552       // Lookup the bits for this element, wrapping around the immediate every
2553       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2554       // to offset by the first index of each group.
2555       for (unsigned i = 0; i != NumElts; ++i)
2556         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2557 
2558       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2559 
2560       if (CI->getNumArgOperands() == 4)
2561         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2562                             CI->getArgOperand(2));
2563     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2564                          Name.startswith("avx512.mask.pshufl.w."))) {
2565       Value *Op0 = CI->getArgOperand(0);
2566       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2567       unsigned NumElts = CI->getType()->getVectorNumElements();
2568 
2569       SmallVector<uint32_t, 16> Idxs(NumElts);
2570       for (unsigned l = 0; l != NumElts; l += 8) {
2571         for (unsigned i = 0; i != 4; ++i)
2572           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2573         for (unsigned i = 4; i != 8; ++i)
2574           Idxs[i + l] = i + l;
2575       }
2576 
2577       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2578 
2579       if (CI->getNumArgOperands() == 4)
2580         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2581                             CI->getArgOperand(2));
2582     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2583                          Name.startswith("avx512.mask.pshufh.w."))) {
2584       Value *Op0 = CI->getArgOperand(0);
2585       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2586       unsigned NumElts = CI->getType()->getVectorNumElements();
2587 
2588       SmallVector<uint32_t, 16> Idxs(NumElts);
2589       for (unsigned l = 0; l != NumElts; l += 8) {
2590         for (unsigned i = 0; i != 4; ++i)
2591           Idxs[i + l] = i + l;
2592         for (unsigned i = 0; i != 4; ++i)
2593           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2594       }
2595 
2596       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2597 
2598       if (CI->getNumArgOperands() == 4)
2599         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2600                             CI->getArgOperand(2));
2601     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2602       Value *Op0 = CI->getArgOperand(0);
2603       Value *Op1 = CI->getArgOperand(1);
2604       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2605       unsigned NumElts = CI->getType()->getVectorNumElements();
2606 
2607       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2608       unsigned HalfLaneElts = NumLaneElts / 2;
2609 
2610       SmallVector<uint32_t, 16> Idxs(NumElts);
2611       for (unsigned i = 0; i != NumElts; ++i) {
2612         // Base index is the starting element of the lane.
2613         Idxs[i] = i - (i % NumLaneElts);
2614         // If we are half way through the lane switch to the other source.
2615         if ((i % NumLaneElts) >= HalfLaneElts)
2616           Idxs[i] += NumElts;
2617         // Now select the specific element. By adding HalfLaneElts bits from
2618         // the immediate. Wrapping around the immediate every 8-bits.
2619         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2620       }
2621 
2622       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2623 
2624       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2625                           CI->getArgOperand(3));
2626     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2627                          Name.startswith("avx512.mask.movshdup") ||
2628                          Name.startswith("avx512.mask.movsldup"))) {
2629       Value *Op0 = CI->getArgOperand(0);
2630       unsigned NumElts = CI->getType()->getVectorNumElements();
2631       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2632 
2633       unsigned Offset = 0;
2634       if (Name.startswith("avx512.mask.movshdup."))
2635         Offset = 1;
2636 
2637       SmallVector<uint32_t, 16> Idxs(NumElts);
2638       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2639         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2640           Idxs[i + l + 0] = i + l + Offset;
2641           Idxs[i + l + 1] = i + l + Offset;
2642         }
2643 
2644       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2645 
2646       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2647                           CI->getArgOperand(1));
2648     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2649                          Name.startswith("avx512.mask.unpckl."))) {
2650       Value *Op0 = CI->getArgOperand(0);
2651       Value *Op1 = CI->getArgOperand(1);
2652       int NumElts = CI->getType()->getVectorNumElements();
2653       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2654 
2655       SmallVector<uint32_t, 64> Idxs(NumElts);
2656       for (int l = 0; l != NumElts; l += NumLaneElts)
2657         for (int i = 0; i != NumLaneElts; ++i)
2658           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2659 
2660       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2661 
2662       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2663                           CI->getArgOperand(2));
2664     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2665                          Name.startswith("avx512.mask.unpckh."))) {
2666       Value *Op0 = CI->getArgOperand(0);
2667       Value *Op1 = CI->getArgOperand(1);
2668       int NumElts = CI->getType()->getVectorNumElements();
2669       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2670 
2671       SmallVector<uint32_t, 64> Idxs(NumElts);
2672       for (int l = 0; l != NumElts; l += NumLaneElts)
2673         for (int i = 0; i != NumLaneElts; ++i)
2674           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2675 
2676       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2677 
2678       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2679                           CI->getArgOperand(2));
2680     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2681                          Name.startswith("avx512.mask.pand."))) {
2682       VectorType *FTy = cast<VectorType>(CI->getType());
2683       VectorType *ITy = VectorType::getInteger(FTy);
2684       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2685                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2686       Rep = Builder.CreateBitCast(Rep, FTy);
2687       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2688                           CI->getArgOperand(2));
2689     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2690                          Name.startswith("avx512.mask.pandn."))) {
2691       VectorType *FTy = cast<VectorType>(CI->getType());
2692       VectorType *ITy = VectorType::getInteger(FTy);
2693       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2694       Rep = Builder.CreateAnd(Rep,
2695                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2696       Rep = Builder.CreateBitCast(Rep, FTy);
2697       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2698                           CI->getArgOperand(2));
2699     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2700                          Name.startswith("avx512.mask.por."))) {
2701       VectorType *FTy = cast<VectorType>(CI->getType());
2702       VectorType *ITy = VectorType::getInteger(FTy);
2703       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2704                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2705       Rep = Builder.CreateBitCast(Rep, FTy);
2706       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2707                           CI->getArgOperand(2));
2708     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2709                          Name.startswith("avx512.mask.pxor."))) {
2710       VectorType *FTy = cast<VectorType>(CI->getType());
2711       VectorType *ITy = VectorType::getInteger(FTy);
2712       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2713                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2714       Rep = Builder.CreateBitCast(Rep, FTy);
2715       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2716                           CI->getArgOperand(2));
2717     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2718       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2719       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2720                           CI->getArgOperand(2));
2721     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2722       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2723       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2724                           CI->getArgOperand(2));
2725     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2726       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2727       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2728                           CI->getArgOperand(2));
2729     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2730       if (Name.endswith(".512")) {
2731         Intrinsic::ID IID;
2732         if (Name[17] == 's')
2733           IID = Intrinsic::x86_avx512_add_ps_512;
2734         else
2735           IID = Intrinsic::x86_avx512_add_pd_512;
2736 
2737         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2738                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2739                                    CI->getArgOperand(4) });
2740       } else {
2741         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2742       }
2743       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2744                           CI->getArgOperand(2));
2745     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2746       if (Name.endswith(".512")) {
2747         Intrinsic::ID IID;
2748         if (Name[17] == 's')
2749           IID = Intrinsic::x86_avx512_div_ps_512;
2750         else
2751           IID = Intrinsic::x86_avx512_div_pd_512;
2752 
2753         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2754                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2755                                    CI->getArgOperand(4) });
2756       } else {
2757         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2758       }
2759       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2760                           CI->getArgOperand(2));
2761     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2762       if (Name.endswith(".512")) {
2763         Intrinsic::ID IID;
2764         if (Name[17] == 's')
2765           IID = Intrinsic::x86_avx512_mul_ps_512;
2766         else
2767           IID = Intrinsic::x86_avx512_mul_pd_512;
2768 
2769         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2770                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2771                                    CI->getArgOperand(4) });
2772       } else {
2773         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2774       }
2775       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2776                           CI->getArgOperand(2));
2777     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2778       if (Name.endswith(".512")) {
2779         Intrinsic::ID IID;
2780         if (Name[17] == 's')
2781           IID = Intrinsic::x86_avx512_sub_ps_512;
2782         else
2783           IID = Intrinsic::x86_avx512_sub_pd_512;
2784 
2785         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2786                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2787                                    CI->getArgOperand(4) });
2788       } else {
2789         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2790       }
2791       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2792                           CI->getArgOperand(2));
2793     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2794                          Name.startswith("avx512.mask.min.p")) &&
2795                Name.drop_front(18) == ".512") {
2796       bool IsDouble = Name[17] == 'd';
2797       bool IsMin = Name[13] == 'i';
2798       static const Intrinsic::ID MinMaxTbl[2][2] = {
2799         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2800         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2801       };
2802       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2803 
2804       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2805                                { CI->getArgOperand(0), CI->getArgOperand(1),
2806                                  CI->getArgOperand(4) });
2807       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2808                           CI->getArgOperand(2));
2809     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2810       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2811                                                          Intrinsic::ctlz,
2812                                                          CI->getType()),
2813                                { CI->getArgOperand(0), Builder.getInt1(false) });
2814       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2815                           CI->getArgOperand(1));
2816     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2817       bool IsImmediate = Name[16] == 'i' ||
2818                          (Name.size() > 18 && Name[18] == 'i');
2819       bool IsVariable = Name[16] == 'v';
2820       char Size = Name[16] == '.' ? Name[17] :
2821                   Name[17] == '.' ? Name[18] :
2822                   Name[18] == '.' ? Name[19] :
2823                                     Name[20];
2824 
2825       Intrinsic::ID IID;
2826       if (IsVariable && Name[17] != '.') {
2827         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2828           IID = Intrinsic::x86_avx2_psllv_q;
2829         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2830           IID = Intrinsic::x86_avx2_psllv_q_256;
2831         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2832           IID = Intrinsic::x86_avx2_psllv_d;
2833         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2834           IID = Intrinsic::x86_avx2_psllv_d_256;
2835         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2836           IID = Intrinsic::x86_avx512_psllv_w_128;
2837         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2838           IID = Intrinsic::x86_avx512_psllv_w_256;
2839         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2840           IID = Intrinsic::x86_avx512_psllv_w_512;
2841         else
2842           llvm_unreachable("Unexpected size");
2843       } else if (Name.endswith(".128")) {
2844         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2845           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2846                             : Intrinsic::x86_sse2_psll_d;
2847         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2848           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2849                             : Intrinsic::x86_sse2_psll_q;
2850         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2851           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2852                             : Intrinsic::x86_sse2_psll_w;
2853         else
2854           llvm_unreachable("Unexpected size");
2855       } else if (Name.endswith(".256")) {
2856         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2857           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2858                             : Intrinsic::x86_avx2_psll_d;
2859         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2860           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2861                             : Intrinsic::x86_avx2_psll_q;
2862         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2863           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2864                             : Intrinsic::x86_avx2_psll_w;
2865         else
2866           llvm_unreachable("Unexpected size");
2867       } else {
2868         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2869           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2870                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2871                               Intrinsic::x86_avx512_psll_d_512;
2872         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2873           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2874                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2875                               Intrinsic::x86_avx512_psll_q_512;
2876         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2877           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2878                             : Intrinsic::x86_avx512_psll_w_512;
2879         else
2880           llvm_unreachable("Unexpected size");
2881       }
2882 
2883       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2884     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2885       bool IsImmediate = Name[16] == 'i' ||
2886                          (Name.size() > 18 && Name[18] == 'i');
2887       bool IsVariable = Name[16] == 'v';
2888       char Size = Name[16] == '.' ? Name[17] :
2889                   Name[17] == '.' ? Name[18] :
2890                   Name[18] == '.' ? Name[19] :
2891                                     Name[20];
2892 
2893       Intrinsic::ID IID;
2894       if (IsVariable && Name[17] != '.') {
2895         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2896           IID = Intrinsic::x86_avx2_psrlv_q;
2897         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2898           IID = Intrinsic::x86_avx2_psrlv_q_256;
2899         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2900           IID = Intrinsic::x86_avx2_psrlv_d;
2901         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2902           IID = Intrinsic::x86_avx2_psrlv_d_256;
2903         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2904           IID = Intrinsic::x86_avx512_psrlv_w_128;
2905         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2906           IID = Intrinsic::x86_avx512_psrlv_w_256;
2907         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2908           IID = Intrinsic::x86_avx512_psrlv_w_512;
2909         else
2910           llvm_unreachable("Unexpected size");
2911       } else if (Name.endswith(".128")) {
2912         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2913           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2914                             : Intrinsic::x86_sse2_psrl_d;
2915         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2916           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2917                             : Intrinsic::x86_sse2_psrl_q;
2918         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2919           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2920                             : Intrinsic::x86_sse2_psrl_w;
2921         else
2922           llvm_unreachable("Unexpected size");
2923       } else if (Name.endswith(".256")) {
2924         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2925           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2926                             : Intrinsic::x86_avx2_psrl_d;
2927         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2928           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2929                             : Intrinsic::x86_avx2_psrl_q;
2930         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2931           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2932                             : Intrinsic::x86_avx2_psrl_w;
2933         else
2934           llvm_unreachable("Unexpected size");
2935       } else {
2936         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2937           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2938                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2939                               Intrinsic::x86_avx512_psrl_d_512;
2940         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2941           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2942                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2943                               Intrinsic::x86_avx512_psrl_q_512;
2944         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2945           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2946                             : Intrinsic::x86_avx512_psrl_w_512;
2947         else
2948           llvm_unreachable("Unexpected size");
2949       }
2950 
2951       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2952     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2953       bool IsImmediate = Name[16] == 'i' ||
2954                          (Name.size() > 18 && Name[18] == 'i');
2955       bool IsVariable = Name[16] == 'v';
2956       char Size = Name[16] == '.' ? Name[17] :
2957                   Name[17] == '.' ? Name[18] :
2958                   Name[18] == '.' ? Name[19] :
2959                                     Name[20];
2960 
2961       Intrinsic::ID IID;
2962       if (IsVariable && Name[17] != '.') {
2963         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2964           IID = Intrinsic::x86_avx2_psrav_d;
2965         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2966           IID = Intrinsic::x86_avx2_psrav_d_256;
2967         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2968           IID = Intrinsic::x86_avx512_psrav_w_128;
2969         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2970           IID = Intrinsic::x86_avx512_psrav_w_256;
2971         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2972           IID = Intrinsic::x86_avx512_psrav_w_512;
2973         else
2974           llvm_unreachable("Unexpected size");
2975       } else if (Name.endswith(".128")) {
2976         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2977           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2978                             : Intrinsic::x86_sse2_psra_d;
2979         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2980           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2981                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2982                               Intrinsic::x86_avx512_psra_q_128;
2983         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2984           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2985                             : Intrinsic::x86_sse2_psra_w;
2986         else
2987           llvm_unreachable("Unexpected size");
2988       } else if (Name.endswith(".256")) {
2989         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2990           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2991                             : Intrinsic::x86_avx2_psra_d;
2992         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2993           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2994                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2995                               Intrinsic::x86_avx512_psra_q_256;
2996         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2997           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2998                             : Intrinsic::x86_avx2_psra_w;
2999         else
3000           llvm_unreachable("Unexpected size");
3001       } else {
3002         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3003           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3004                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3005                               Intrinsic::x86_avx512_psra_d_512;
3006         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3007           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3008                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3009                               Intrinsic::x86_avx512_psra_q_512;
3010         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3011           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3012                             : Intrinsic::x86_avx512_psra_w_512;
3013         else
3014           llvm_unreachable("Unexpected size");
3015       }
3016 
3017       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3018     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3019       Rep = upgradeMaskedMove(Builder, *CI);
3020     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3021       Rep = UpgradeMaskToInt(Builder, *CI);
3022     } else if (IsX86 && Name.endswith(".movntdqa")) {
3023       Module *M = F->getParent();
3024       MDNode *Node = MDNode::get(
3025           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3026 
3027       Value *Ptr = CI->getArgOperand(0);
3028       VectorType *VTy = cast<VectorType>(CI->getType());
3029 
3030       // Convert the type of the pointer to a pointer to the stored type.
3031       Value *BC =
3032           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
3033       LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
3034       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3035       Rep = LI;
3036     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3037                          Name.startswith("fma.vfmsub.") ||
3038                          Name.startswith("fma.vfnmadd.") ||
3039                          Name.startswith("fma.vfnmsub."))) {
3040       bool NegMul = Name[6] == 'n';
3041       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3042       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3043 
3044       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3045                        CI->getArgOperand(2) };
3046 
3047       if (IsScalar) {
3048         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3049         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3050         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3051       }
3052 
3053       if (NegMul && !IsScalar)
3054         Ops[0] = Builder.CreateFNeg(Ops[0]);
3055       if (NegMul && IsScalar)
3056         Ops[1] = Builder.CreateFNeg(Ops[1]);
3057       if (NegAcc)
3058         Ops[2] = Builder.CreateFNeg(Ops[2]);
3059 
3060       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3061                                                          Intrinsic::fma,
3062                                                          Ops[0]->getType()),
3063                                Ops);
3064 
3065       if (IsScalar)
3066         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3067                                           (uint64_t)0);
3068     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3069       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3070                        CI->getArgOperand(2) };
3071 
3072       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3073       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3074       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3075 
3076       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3077                                                          Intrinsic::fma,
3078                                                          Ops[0]->getType()),
3079                                Ops);
3080 
3081       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3082                                         Rep, (uint64_t)0);
3083     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3084                          Name.startswith("avx512.maskz.vfmadd.s") ||
3085                          Name.startswith("avx512.mask3.vfmadd.s") ||
3086                          Name.startswith("avx512.mask3.vfmsub.s") ||
3087                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3088       bool IsMask3 = Name[11] == '3';
3089       bool IsMaskZ = Name[11] == 'z';
3090       // Drop the "avx512.mask." to make it easier.
3091       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3092       bool NegMul = Name[2] == 'n';
3093       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3094 
3095       Value *A = CI->getArgOperand(0);
3096       Value *B = CI->getArgOperand(1);
3097       Value *C = CI->getArgOperand(2);
3098 
3099       if (NegMul && (IsMask3 || IsMaskZ))
3100         A = Builder.CreateFNeg(A);
3101       if (NegMul && !(IsMask3 || IsMaskZ))
3102         B = Builder.CreateFNeg(B);
3103       if (NegAcc)
3104         C = Builder.CreateFNeg(C);
3105 
3106       A = Builder.CreateExtractElement(A, (uint64_t)0);
3107       B = Builder.CreateExtractElement(B, (uint64_t)0);
3108       C = Builder.CreateExtractElement(C, (uint64_t)0);
3109 
3110       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3111           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3112         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3113 
3114         Intrinsic::ID IID;
3115         if (Name.back() == 'd')
3116           IID = Intrinsic::x86_avx512_vfmadd_f64;
3117         else
3118           IID = Intrinsic::x86_avx512_vfmadd_f32;
3119         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3120         Rep = Builder.CreateCall(FMA, Ops);
3121       } else {
3122         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3123                                                   Intrinsic::fma,
3124                                                   A->getType());
3125         Rep = Builder.CreateCall(FMA, { A, B, C });
3126       }
3127 
3128       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3129                         IsMask3 ? C : A;
3130 
3131       // For Mask3 with NegAcc, we need to create a new extractelement that
3132       // avoids the negation above.
3133       if (NegAcc && IsMask3)
3134         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3135                                                 (uint64_t)0);
3136 
3137       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3138                                 Rep, PassThru);
3139       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3140                                         Rep, (uint64_t)0);
3141     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3142                          Name.startswith("avx512.mask.vfnmadd.p") ||
3143                          Name.startswith("avx512.mask.vfnmsub.p") ||
3144                          Name.startswith("avx512.mask3.vfmadd.p") ||
3145                          Name.startswith("avx512.mask3.vfmsub.p") ||
3146                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3147                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3148       bool IsMask3 = Name[11] == '3';
3149       bool IsMaskZ = Name[11] == 'z';
3150       // Drop the "avx512.mask." to make it easier.
3151       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3152       bool NegMul = Name[2] == 'n';
3153       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3154 
3155       Value *A = CI->getArgOperand(0);
3156       Value *B = CI->getArgOperand(1);
3157       Value *C = CI->getArgOperand(2);
3158 
3159       if (NegMul && (IsMask3 || IsMaskZ))
3160         A = Builder.CreateFNeg(A);
3161       if (NegMul && !(IsMask3 || IsMaskZ))
3162         B = Builder.CreateFNeg(B);
3163       if (NegAcc)
3164         C = Builder.CreateFNeg(C);
3165 
3166       if (CI->getNumArgOperands() == 5 &&
3167           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3168            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3169         Intrinsic::ID IID;
3170         // Check the character before ".512" in string.
3171         if (Name[Name.size()-5] == 's')
3172           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3173         else
3174           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3175 
3176         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3177                                  { A, B, C, CI->getArgOperand(4) });
3178       } else {
3179         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3180                                                   Intrinsic::fma,
3181                                                   A->getType());
3182         Rep = Builder.CreateCall(FMA, { A, B, C });
3183       }
3184 
3185       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3186                         IsMask3 ? CI->getArgOperand(2) :
3187                                   CI->getArgOperand(0);
3188 
3189       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3190     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3191                          Name.startswith("fma.vfmsubadd.p"))) {
3192       bool IsSubAdd = Name[7] == 's';
3193       int NumElts = CI->getType()->getVectorNumElements();
3194 
3195       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3196                        CI->getArgOperand(2) };
3197 
3198       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3199                                                 Ops[0]->getType());
3200       Value *Odd = Builder.CreateCall(FMA, Ops);
3201       Ops[2] = Builder.CreateFNeg(Ops[2]);
3202       Value *Even = Builder.CreateCall(FMA, Ops);
3203 
3204       if (IsSubAdd)
3205         std::swap(Even, Odd);
3206 
3207       SmallVector<uint32_t, 32> Idxs(NumElts);
3208       for (int i = 0; i != NumElts; ++i)
3209         Idxs[i] = i + (i % 2) * NumElts;
3210 
3211       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3212     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3213                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3214                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3215                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3216       bool IsMask3 = Name[11] == '3';
3217       bool IsMaskZ = Name[11] == 'z';
3218       // Drop the "avx512.mask." to make it easier.
3219       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3220       bool IsSubAdd = Name[3] == 's';
3221       if (CI->getNumArgOperands() == 5 &&
3222           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3223            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3224         Intrinsic::ID IID;
3225         // Check the character before ".512" in string.
3226         if (Name[Name.size()-5] == 's')
3227           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3228         else
3229           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3230 
3231         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3232                          CI->getArgOperand(2), CI->getArgOperand(4) };
3233         if (IsSubAdd)
3234           Ops[2] = Builder.CreateFNeg(Ops[2]);
3235 
3236         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3237                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3238                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3239       } else {
3240         int NumElts = CI->getType()->getVectorNumElements();
3241 
3242         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3243                          CI->getArgOperand(2) };
3244 
3245         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3246                                                   Ops[0]->getType());
3247         Value *Odd = Builder.CreateCall(FMA, Ops);
3248         Ops[2] = Builder.CreateFNeg(Ops[2]);
3249         Value *Even = Builder.CreateCall(FMA, Ops);
3250 
3251         if (IsSubAdd)
3252           std::swap(Even, Odd);
3253 
3254         SmallVector<uint32_t, 32> Idxs(NumElts);
3255         for (int i = 0; i != NumElts; ++i)
3256           Idxs[i] = i + (i % 2) * NumElts;
3257 
3258         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3259       }
3260 
3261       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3262                         IsMask3 ? CI->getArgOperand(2) :
3263                                   CI->getArgOperand(0);
3264 
3265       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3266     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3267                          Name.startswith("avx512.maskz.pternlog."))) {
3268       bool ZeroMask = Name[11] == 'z';
3269       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3270       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3271       Intrinsic::ID IID;
3272       if (VecWidth == 128 && EltWidth == 32)
3273         IID = Intrinsic::x86_avx512_pternlog_d_128;
3274       else if (VecWidth == 256 && EltWidth == 32)
3275         IID = Intrinsic::x86_avx512_pternlog_d_256;
3276       else if (VecWidth == 512 && EltWidth == 32)
3277         IID = Intrinsic::x86_avx512_pternlog_d_512;
3278       else if (VecWidth == 128 && EltWidth == 64)
3279         IID = Intrinsic::x86_avx512_pternlog_q_128;
3280       else if (VecWidth == 256 && EltWidth == 64)
3281         IID = Intrinsic::x86_avx512_pternlog_q_256;
3282       else if (VecWidth == 512 && EltWidth == 64)
3283         IID = Intrinsic::x86_avx512_pternlog_q_512;
3284       else
3285         llvm_unreachable("Unexpected intrinsic");
3286 
3287       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3288                         CI->getArgOperand(2), CI->getArgOperand(3) };
3289       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3290                                Args);
3291       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3292                                  : CI->getArgOperand(0);
3293       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3294     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3295                          Name.startswith("avx512.maskz.vpmadd52"))) {
3296       bool ZeroMask = Name[11] == 'z';
3297       bool High = Name[20] == 'h' || Name[21] == 'h';
3298       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3299       Intrinsic::ID IID;
3300       if (VecWidth == 128 && !High)
3301         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3302       else if (VecWidth == 256 && !High)
3303         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3304       else if (VecWidth == 512 && !High)
3305         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3306       else if (VecWidth == 128 && High)
3307         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3308       else if (VecWidth == 256 && High)
3309         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3310       else if (VecWidth == 512 && High)
3311         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3312       else
3313         llvm_unreachable("Unexpected intrinsic");
3314 
3315       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3316                         CI->getArgOperand(2) };
3317       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3318                                Args);
3319       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3320                                  : CI->getArgOperand(0);
3321       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3322     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3323                          Name.startswith("avx512.mask.vpermt2var.") ||
3324                          Name.startswith("avx512.maskz.vpermt2var."))) {
3325       bool ZeroMask = Name[11] == 'z';
3326       bool IndexForm = Name[17] == 'i';
3327       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3328     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3329                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3330                          Name.startswith("avx512.mask.vpdpbusds.") ||
3331                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3332       bool ZeroMask = Name[11] == 'z';
3333       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3334       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3335       Intrinsic::ID IID;
3336       if (VecWidth == 128 && !IsSaturating)
3337         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3338       else if (VecWidth == 256 && !IsSaturating)
3339         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3340       else if (VecWidth == 512 && !IsSaturating)
3341         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3342       else if (VecWidth == 128 && IsSaturating)
3343         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3344       else if (VecWidth == 256 && IsSaturating)
3345         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3346       else if (VecWidth == 512 && IsSaturating)
3347         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3348       else
3349         llvm_unreachable("Unexpected intrinsic");
3350 
3351       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3352                         CI->getArgOperand(2)  };
3353       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3354                                Args);
3355       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3356                                  : CI->getArgOperand(0);
3357       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3358     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3359                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3360                          Name.startswith("avx512.mask.vpdpwssds.") ||
3361                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3362       bool ZeroMask = Name[11] == 'z';
3363       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3364       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3365       Intrinsic::ID IID;
3366       if (VecWidth == 128 && !IsSaturating)
3367         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3368       else if (VecWidth == 256 && !IsSaturating)
3369         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3370       else if (VecWidth == 512 && !IsSaturating)
3371         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3372       else if (VecWidth == 128 && IsSaturating)
3373         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3374       else if (VecWidth == 256 && IsSaturating)
3375         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3376       else if (VecWidth == 512 && IsSaturating)
3377         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3378       else
3379         llvm_unreachable("Unexpected intrinsic");
3380 
3381       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3382                         CI->getArgOperand(2)  };
3383       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3384                                Args);
3385       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3386                                  : CI->getArgOperand(0);
3387       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3388     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3389                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3390                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3391       Intrinsic::ID IID;
3392       if (Name[0] == 'a' && Name.back() == '2')
3393         IID = Intrinsic::x86_addcarry_32;
3394       else if (Name[0] == 'a' && Name.back() == '4')
3395         IID = Intrinsic::x86_addcarry_64;
3396       else if (Name[0] == 's' && Name.back() == '2')
3397         IID = Intrinsic::x86_subborrow_32;
3398       else if (Name[0] == 's' && Name.back() == '4')
3399         IID = Intrinsic::x86_subborrow_64;
3400       else
3401         llvm_unreachable("Unexpected intrinsic");
3402 
3403       // Make a call with 3 operands.
3404       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3405                         CI->getArgOperand(2)};
3406       Value *NewCall = Builder.CreateCall(
3407                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3408                                 Args);
3409 
3410       // Extract the second result and store it.
3411       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3412       // Cast the pointer to the right type.
3413       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3414                                  llvm::PointerType::getUnqual(Data->getType()));
3415       Builder.CreateAlignedStore(Data, Ptr, 1);
3416       // Replace the original call result with the first result of the new call.
3417       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3418 
3419       CI->replaceAllUsesWith(CF);
3420       Rep = nullptr;
3421     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3422                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3423       // Rep will be updated by the call in the condition.
3424     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3425       Value *Arg = CI->getArgOperand(0);
3426       Value *Neg = Builder.CreateNeg(Arg, "neg");
3427       Value *Cmp = Builder.CreateICmpSGE(
3428           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3429       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3430     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3431                           Name.startswith("atomic.load.add.f64.p"))) {
3432       Value *Ptr = CI->getArgOperand(0);
3433       Value *Val = CI->getArgOperand(1);
3434       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3435                                     AtomicOrdering::SequentiallyConsistent);
3436     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3437                           Name == "max.ui" || Name == "max.ull")) {
3438       Value *Arg0 = CI->getArgOperand(0);
3439       Value *Arg1 = CI->getArgOperand(1);
3440       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3441                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3442                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3443       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3444     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3445                           Name == "min.ui" || Name == "min.ull")) {
3446       Value *Arg0 = CI->getArgOperand(0);
3447       Value *Arg1 = CI->getArgOperand(1);
3448       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3449                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3450                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3451       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3452     } else if (IsNVVM && Name == "clz.ll") {
3453       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3454       Value *Arg = CI->getArgOperand(0);
3455       Value *Ctlz = Builder.CreateCall(
3456           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3457                                     {Arg->getType()}),
3458           {Arg, Builder.getFalse()}, "ctlz");
3459       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3460     } else if (IsNVVM && Name == "popc.ll") {
3461       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3462       // i64.
3463       Value *Arg = CI->getArgOperand(0);
3464       Value *Popc = Builder.CreateCall(
3465           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3466                                     {Arg->getType()}),
3467           Arg, "ctpop");
3468       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3469     } else if (IsNVVM && Name == "h2f") {
3470       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3471                                    F->getParent(), Intrinsic::convert_from_fp16,
3472                                    {Builder.getFloatTy()}),
3473                                CI->getArgOperand(0), "h2f");
3474     } else {
3475       llvm_unreachable("Unknown function for CallInst upgrade.");
3476     }
3477 
3478     if (Rep)
3479       CI->replaceAllUsesWith(Rep);
3480     CI->eraseFromParent();
3481     return;
3482   }
3483 
3484   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3485     // Handle generic mangling change, but nothing else
3486     assert(
3487         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3488         "Unknown function for CallInst upgrade and isn't just a name change");
3489     CI->setCalledFunction(NewFn);
3490   };
3491   CallInst *NewCall = nullptr;
3492   switch (NewFn->getIntrinsicID()) {
3493   default: {
3494     DefaultCase();
3495     return;
3496   }
3497   case Intrinsic::experimental_vector_reduce_v2_fmul: {
3498     SmallVector<Value *, 2> Args;
3499     if (CI->isFast())
3500       Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3501     else
3502       Args.push_back(CI->getOperand(0));
3503     Args.push_back(CI->getOperand(1));
3504     NewCall = Builder.CreateCall(NewFn, Args);
3505     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3506     break;
3507   }
3508   case Intrinsic::experimental_vector_reduce_v2_fadd: {
3509     SmallVector<Value *, 2> Args;
3510     if (CI->isFast())
3511       Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3512     else
3513       Args.push_back(CI->getOperand(0));
3514     Args.push_back(CI->getOperand(1));
3515     NewCall = Builder.CreateCall(NewFn, Args);
3516     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3517     break;
3518   }
3519   case Intrinsic::arm_neon_vld1:
3520   case Intrinsic::arm_neon_vld2:
3521   case Intrinsic::arm_neon_vld3:
3522   case Intrinsic::arm_neon_vld4:
3523   case Intrinsic::arm_neon_vld2lane:
3524   case Intrinsic::arm_neon_vld3lane:
3525   case Intrinsic::arm_neon_vld4lane:
3526   case Intrinsic::arm_neon_vst1:
3527   case Intrinsic::arm_neon_vst2:
3528   case Intrinsic::arm_neon_vst3:
3529   case Intrinsic::arm_neon_vst4:
3530   case Intrinsic::arm_neon_vst2lane:
3531   case Intrinsic::arm_neon_vst3lane:
3532   case Intrinsic::arm_neon_vst4lane: {
3533     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3534                                  CI->arg_operands().end());
3535     NewCall = Builder.CreateCall(NewFn, Args);
3536     break;
3537   }
3538 
3539   case Intrinsic::bitreverse:
3540     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3541     break;
3542 
3543   case Intrinsic::ctlz:
3544   case Intrinsic::cttz:
3545     assert(CI->getNumArgOperands() == 1 &&
3546            "Mismatch between function args and call args");
3547     NewCall =
3548         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3549     break;
3550 
3551   case Intrinsic::objectsize: {
3552     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3553                                    ? Builder.getFalse()
3554                                    : CI->getArgOperand(2);
3555     Value *Dynamic =
3556         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3557     NewCall = Builder.CreateCall(
3558         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3559     break;
3560   }
3561 
3562   case Intrinsic::ctpop:
3563     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3564     break;
3565 
3566   case Intrinsic::convert_from_fp16:
3567     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3568     break;
3569 
3570   case Intrinsic::dbg_value:
3571     // Upgrade from the old version that had an extra offset argument.
3572     assert(CI->getNumArgOperands() == 4);
3573     // Drop nonzero offsets instead of attempting to upgrade them.
3574     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3575       if (Offset->isZeroValue()) {
3576         NewCall = Builder.CreateCall(
3577             NewFn,
3578             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3579         break;
3580       }
3581     CI->eraseFromParent();
3582     return;
3583 
3584   case Intrinsic::x86_xop_vfrcz_ss:
3585   case Intrinsic::x86_xop_vfrcz_sd:
3586     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3587     break;
3588 
3589   case Intrinsic::x86_xop_vpermil2pd:
3590   case Intrinsic::x86_xop_vpermil2ps:
3591   case Intrinsic::x86_xop_vpermil2pd_256:
3592   case Intrinsic::x86_xop_vpermil2ps_256: {
3593     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3594                                  CI->arg_operands().end());
3595     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3596     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3597     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3598     NewCall = Builder.CreateCall(NewFn, Args);
3599     break;
3600   }
3601 
3602   case Intrinsic::x86_sse41_ptestc:
3603   case Intrinsic::x86_sse41_ptestz:
3604   case Intrinsic::x86_sse41_ptestnzc: {
3605     // The arguments for these intrinsics used to be v4f32, and changed
3606     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3607     // So, the only thing required is a bitcast for both arguments.
3608     // First, check the arguments have the old type.
3609     Value *Arg0 = CI->getArgOperand(0);
3610     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3611       return;
3612 
3613     // Old intrinsic, add bitcasts
3614     Value *Arg1 = CI->getArgOperand(1);
3615 
3616     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3617 
3618     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3619     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3620 
3621     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3622     break;
3623   }
3624 
3625   case Intrinsic::x86_rdtscp: {
3626     // This used to take 1 arguments. If we have no arguments, it is already
3627     // upgraded.
3628     if (CI->getNumOperands() == 0)
3629       return;
3630 
3631     NewCall = Builder.CreateCall(NewFn);
3632     // Extract the second result and store it.
3633     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3634     // Cast the pointer to the right type.
3635     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3636                                  llvm::PointerType::getUnqual(Data->getType()));
3637     Builder.CreateAlignedStore(Data, Ptr, 1);
3638     // Replace the original call result with the first result of the new call.
3639     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3640 
3641     std::string Name = CI->getName();
3642     if (!Name.empty()) {
3643       CI->setName(Name + ".old");
3644       NewCall->setName(Name);
3645     }
3646     CI->replaceAllUsesWith(TSC);
3647     CI->eraseFromParent();
3648     return;
3649   }
3650 
3651   case Intrinsic::x86_sse41_insertps:
3652   case Intrinsic::x86_sse41_dppd:
3653   case Intrinsic::x86_sse41_dpps:
3654   case Intrinsic::x86_sse41_mpsadbw:
3655   case Intrinsic::x86_avx_dp_ps_256:
3656   case Intrinsic::x86_avx2_mpsadbw: {
3657     // Need to truncate the last argument from i32 to i8 -- this argument models
3658     // an inherently 8-bit immediate operand to these x86 instructions.
3659     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3660                                  CI->arg_operands().end());
3661 
3662     // Replace the last argument with a trunc.
3663     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3664     NewCall = Builder.CreateCall(NewFn, Args);
3665     break;
3666   }
3667 
3668   case Intrinsic::thread_pointer: {
3669     NewCall = Builder.CreateCall(NewFn, {});
3670     break;
3671   }
3672 
3673   case Intrinsic::invariant_start:
3674   case Intrinsic::invariant_end:
3675   case Intrinsic::masked_load:
3676   case Intrinsic::masked_store:
3677   case Intrinsic::masked_gather:
3678   case Intrinsic::masked_scatter: {
3679     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3680                                  CI->arg_operands().end());
3681     NewCall = Builder.CreateCall(NewFn, Args);
3682     break;
3683   }
3684 
3685   case Intrinsic::memcpy:
3686   case Intrinsic::memmove:
3687   case Intrinsic::memset: {
3688     // We have to make sure that the call signature is what we're expecting.
3689     // We only want to change the old signatures by removing the alignment arg:
3690     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3691     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3692     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3693     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3694     // Note: i8*'s in the above can be any pointer type
3695     if (CI->getNumArgOperands() != 5) {
3696       DefaultCase();
3697       return;
3698     }
3699     // Remove alignment argument (3), and add alignment attributes to the
3700     // dest/src pointers.
3701     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3702                       CI->getArgOperand(2), CI->getArgOperand(4)};
3703     NewCall = Builder.CreateCall(NewFn, Args);
3704     auto *MemCI = cast<MemIntrinsic>(NewCall);
3705     // All mem intrinsics support dest alignment.
3706     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3707     MemCI->setDestAlignment(Align->getZExtValue());
3708     // Memcpy/Memmove also support source alignment.
3709     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3710       MTI->setSourceAlignment(Align->getZExtValue());
3711     break;
3712   }
3713   }
3714   assert(NewCall && "Should have either set this variable or returned through "
3715                     "the default case");
3716   std::string Name = CI->getName();
3717   if (!Name.empty()) {
3718     CI->setName(Name + ".old");
3719     NewCall->setName(Name);
3720   }
3721   CI->replaceAllUsesWith(NewCall);
3722   CI->eraseFromParent();
3723 }
3724 
3725 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3726   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3727 
3728   // Check if this function should be upgraded and get the replacement function
3729   // if there is one.
3730   Function *NewFn;
3731   if (UpgradeIntrinsicFunction(F, NewFn)) {
3732     // Replace all users of the old function with the new function or new
3733     // instructions. This is not a range loop because the call is deleted.
3734     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3735       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3736         UpgradeIntrinsicCall(CI, NewFn);
3737 
3738     // Remove old function, no longer used, from the module.
3739     F->eraseFromParent();
3740   }
3741 }
3742 
3743 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3744   // Check if the tag uses struct-path aware TBAA format.
3745   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3746     return &MD;
3747 
3748   auto &Context = MD.getContext();
3749   if (MD.getNumOperands() == 3) {
3750     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3751     MDNode *ScalarType = MDNode::get(Context, Elts);
3752     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3753     Metadata *Elts2[] = {ScalarType, ScalarType,
3754                          ConstantAsMetadata::get(
3755                              Constant::getNullValue(Type::getInt64Ty(Context))),
3756                          MD.getOperand(2)};
3757     return MDNode::get(Context, Elts2);
3758   }
3759   // Create a MDNode <MD, MD, offset 0>
3760   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3761                                     Type::getInt64Ty(Context)))};
3762   return MDNode::get(Context, Elts);
3763 }
3764 
3765 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3766                                       Instruction *&Temp) {
3767   if (Opc != Instruction::BitCast)
3768     return nullptr;
3769 
3770   Temp = nullptr;
3771   Type *SrcTy = V->getType();
3772   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3773       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3774     LLVMContext &Context = V->getContext();
3775 
3776     // We have no information about target data layout, so we assume that
3777     // the maximum pointer size is 64bit.
3778     Type *MidTy = Type::getInt64Ty(Context);
3779     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3780 
3781     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3782   }
3783 
3784   return nullptr;
3785 }
3786 
3787 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3788   if (Opc != Instruction::BitCast)
3789     return nullptr;
3790 
3791   Type *SrcTy = C->getType();
3792   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3793       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3794     LLVMContext &Context = C->getContext();
3795 
3796     // We have no information about target data layout, so we assume that
3797     // the maximum pointer size is 64bit.
3798     Type *MidTy = Type::getInt64Ty(Context);
3799 
3800     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3801                                      DestTy);
3802   }
3803 
3804   return nullptr;
3805 }
3806 
3807 /// Check the debug info version number, if it is out-dated, drop the debug
3808 /// info. Return true if module is modified.
3809 bool llvm::UpgradeDebugInfo(Module &M) {
3810   unsigned Version = getDebugMetadataVersionFromModule(M);
3811   if (Version == DEBUG_METADATA_VERSION) {
3812     bool BrokenDebugInfo = false;
3813     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3814       report_fatal_error("Broken module found, compilation aborted!");
3815     if (!BrokenDebugInfo)
3816       // Everything is ok.
3817       return false;
3818     else {
3819       // Diagnose malformed debug info.
3820       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3821       M.getContext().diagnose(Diag);
3822     }
3823   }
3824   bool Modified = StripDebugInfo(M);
3825   if (Modified && Version != DEBUG_METADATA_VERSION) {
3826     // Diagnose a version mismatch.
3827     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3828     M.getContext().diagnose(DiagVersion);
3829   }
3830   return Modified;
3831 }
3832 
3833 /// This checks for objc retain release marker which should be upgraded. It
3834 /// returns true if module is modified.
3835 static bool UpgradeRetainReleaseMarker(Module &M) {
3836   bool Changed = false;
3837   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3838   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3839   if (ModRetainReleaseMarker) {
3840     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3841     if (Op) {
3842       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3843       if (ID) {
3844         SmallVector<StringRef, 4> ValueComp;
3845         ID->getString().split(ValueComp, "#");
3846         if (ValueComp.size() == 2) {
3847           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3848           ID = MDString::get(M.getContext(), NewValue);
3849         }
3850         M.addModuleFlag(Module::Error, MarkerKey, ID);
3851         M.eraseNamedMetadata(ModRetainReleaseMarker);
3852         Changed = true;
3853       }
3854     }
3855   }
3856   return Changed;
3857 }
3858 
3859 void llvm::UpgradeARCRuntime(Module &M) {
3860   // This lambda converts normal function calls to ARC runtime functions to
3861   // intrinsic calls.
3862   auto UpgradeToIntrinsic = [&](const char *OldFunc,
3863                                 llvm::Intrinsic::ID IntrinsicFunc) {
3864     Function *Fn = M.getFunction(OldFunc);
3865 
3866     if (!Fn)
3867       return;
3868 
3869     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3870 
3871     for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3872       CallInst *CI = dyn_cast<CallInst>(*I++);
3873       if (!CI || CI->getCalledFunction() != Fn)
3874         continue;
3875 
3876       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3877       FunctionType *NewFuncTy = NewFn->getFunctionType();
3878       SmallVector<Value *, 2> Args;
3879 
3880       for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3881         Value *Arg = CI->getArgOperand(I);
3882         // Bitcast argument to the parameter type of the new function if it's
3883         // not a variadic argument.
3884         if (I < NewFuncTy->getNumParams())
3885           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3886         Args.push_back(Arg);
3887       }
3888 
3889       // Create a call instruction that calls the new function.
3890       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3891       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3892       NewCall->setName(CI->getName());
3893 
3894       // Bitcast the return value back to the type of the old call.
3895       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3896 
3897       if (!CI->use_empty())
3898         CI->replaceAllUsesWith(NewRetVal);
3899       CI->eraseFromParent();
3900     }
3901 
3902     if (Fn->use_empty())
3903       Fn->eraseFromParent();
3904   };
3905 
3906   // Unconditionally convert a call to "clang.arc.use" to a call to
3907   // "llvm.objc.clang.arc.use".
3908   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3909 
3910   // Upgrade the retain release marker. If there is no need to upgrade
3911   // the marker, that means either the module is already new enough to contain
3912   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3913   if (!UpgradeRetainReleaseMarker(M))
3914     return;
3915 
3916   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3917       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3918       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3919       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3920       {"objc_autoreleaseReturnValue",
3921        llvm::Intrinsic::objc_autoreleaseReturnValue},
3922       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3923       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3924       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3925       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
3926       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
3927       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
3928       {"objc_release", llvm::Intrinsic::objc_release},
3929       {"objc_retain", llvm::Intrinsic::objc_retain},
3930       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
3931       {"objc_retainAutoreleaseReturnValue",
3932        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
3933       {"objc_retainAutoreleasedReturnValue",
3934        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
3935       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
3936       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
3937       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
3938       {"objc_unsafeClaimAutoreleasedReturnValue",
3939        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
3940       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
3941       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
3942       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
3943       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
3944       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
3945       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
3946       {"objc_arc_annotation_topdown_bbstart",
3947        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
3948       {"objc_arc_annotation_topdown_bbend",
3949        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
3950       {"objc_arc_annotation_bottomup_bbstart",
3951        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
3952       {"objc_arc_annotation_bottomup_bbend",
3953        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
3954 
3955   for (auto &I : RuntimeFuncs)
3956     UpgradeToIntrinsic(I.first, I.second);
3957 }
3958 
3959 bool llvm::UpgradeModuleFlags(Module &M) {
3960   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3961   if (!ModFlags)
3962     return false;
3963 
3964   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3965   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3966     MDNode *Op = ModFlags->getOperand(I);
3967     if (Op->getNumOperands() != 3)
3968       continue;
3969     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3970     if (!ID)
3971       continue;
3972     if (ID->getString() == "Objective-C Image Info Version")
3973       HasObjCFlag = true;
3974     if (ID->getString() == "Objective-C Class Properties")
3975       HasClassProperties = true;
3976     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3977     // field was Error and now they are Max.
3978     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3979       if (auto *Behavior =
3980               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3981         if (Behavior->getLimitedValue() == Module::Error) {
3982           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3983           Metadata *Ops[3] = {
3984               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3985               MDString::get(M.getContext(), ID->getString()),
3986               Op->getOperand(2)};
3987           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3988           Changed = true;
3989         }
3990       }
3991     }
3992     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3993     // section name so that llvm-lto will not complain about mismatching
3994     // module flags that is functionally the same.
3995     if (ID->getString() == "Objective-C Image Info Section") {
3996       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3997         SmallVector<StringRef, 4> ValueComp;
3998         Value->getString().split(ValueComp, " ");
3999         if (ValueComp.size() != 1) {
4000           std::string NewValue;
4001           for (auto &S : ValueComp)
4002             NewValue += S.str();
4003           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4004                               MDString::get(M.getContext(), NewValue)};
4005           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4006           Changed = true;
4007         }
4008       }
4009     }
4010   }
4011 
4012   // "Objective-C Class Properties" is recently added for Objective-C. We
4013   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4014   // flag of value 0, so we can correclty downgrade this flag when trying to
4015   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4016   // this module flag.
4017   if (HasObjCFlag && !HasClassProperties) {
4018     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4019                     (uint32_t)0);
4020     Changed = true;
4021   }
4022 
4023   return Changed;
4024 }
4025 
4026 void llvm::UpgradeSectionAttributes(Module &M) {
4027   auto TrimSpaces = [](StringRef Section) -> std::string {
4028     SmallVector<StringRef, 5> Components;
4029     Section.split(Components, ',');
4030 
4031     SmallString<32> Buffer;
4032     raw_svector_ostream OS(Buffer);
4033 
4034     for (auto Component : Components)
4035       OS << ',' << Component.trim();
4036 
4037     return OS.str().substr(1);
4038   };
4039 
4040   for (auto &GV : M.globals()) {
4041     if (!GV.hasSection())
4042       continue;
4043 
4044     StringRef Section = GV.getSection();
4045 
4046     if (!Section.startswith("__DATA, __objc_catlist"))
4047       continue;
4048 
4049     // __DATA, __objc_catlist, regular, no_dead_strip
4050     // __DATA,__objc_catlist,regular,no_dead_strip
4051     GV.setSection(TrimSpaces(Section));
4052   }
4053 }
4054 
4055 static bool isOldLoopArgument(Metadata *MD) {
4056   auto *T = dyn_cast_or_null<MDTuple>(MD);
4057   if (!T)
4058     return false;
4059   if (T->getNumOperands() < 1)
4060     return false;
4061   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4062   if (!S)
4063     return false;
4064   return S->getString().startswith("llvm.vectorizer.");
4065 }
4066 
4067 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4068   StringRef OldPrefix = "llvm.vectorizer.";
4069   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4070 
4071   if (OldTag == "llvm.vectorizer.unroll")
4072     return MDString::get(C, "llvm.loop.interleave.count");
4073 
4074   return MDString::get(
4075       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4076              .str());
4077 }
4078 
4079 static Metadata *upgradeLoopArgument(Metadata *MD) {
4080   auto *T = dyn_cast_or_null<MDTuple>(MD);
4081   if (!T)
4082     return MD;
4083   if (T->getNumOperands() < 1)
4084     return MD;
4085   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4086   if (!OldTag)
4087     return MD;
4088   if (!OldTag->getString().startswith("llvm.vectorizer."))
4089     return MD;
4090 
4091   // This has an old tag.  Upgrade it.
4092   SmallVector<Metadata *, 8> Ops;
4093   Ops.reserve(T->getNumOperands());
4094   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4095   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4096     Ops.push_back(T->getOperand(I));
4097 
4098   return MDTuple::get(T->getContext(), Ops);
4099 }
4100 
4101 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4102   auto *T = dyn_cast<MDTuple>(&N);
4103   if (!T)
4104     return &N;
4105 
4106   if (none_of(T->operands(), isOldLoopArgument))
4107     return &N;
4108 
4109   SmallVector<Metadata *, 8> Ops;
4110   Ops.reserve(T->getNumOperands());
4111   for (Metadata *MD : T->operands())
4112     Ops.push_back(upgradeLoopArgument(MD));
4113 
4114   return MDTuple::get(T->getContext(), Ops);
4115 }
4116 
4117 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4118   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4119 
4120   // If X86, and the datalayout matches the expected format, add pointer size
4121   // address spaces to the datalayout.
4122   Triple::ArchType Arch = Triple(TT).getArch();
4123   if ((Arch != llvm::Triple::x86 && Arch != llvm::Triple::x86_64) ||
4124       DL.contains(AddrSpaces))
4125     return DL;
4126 
4127   SmallVector<StringRef, 4> Groups;
4128   Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4129   if (!R.match(DL, &Groups))
4130     return DL;
4131 
4132   SmallString<1024> Buf;
4133   std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4134   return Res;
4135 }
4136