1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include <cstring>
45
46 using namespace llvm;
47
48 static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
rename(GlobalValue * GV)52 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
upgradePTESTIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)56 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68 }
69
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
upgradeX86IntrinsicsWith8BitMask(Function * F,Intrinsic::ID IID,Function * & NewFn)72 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84 }
85
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
upgradeX86MaskedFPCompare(Function * F,Intrinsic::ID IID,Function * & NewFn)88 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97 }
98
upgradeX86BF16Intrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)99 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107 }
108
upgradeX86BF16DPIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)109 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117 }
118
shouldUpgradeX86Intrinsic(Function * F,StringRef Name)119 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484 }
485
upgradeX86IntrinsicFunction(Function * F,StringRef Name,Function * & NewFn)486 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
492 if (shouldUpgradeX86Intrinsic(F, Name)) {
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
508 Intrinsic::ID ID;
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512 ID = StringSwitch<Intrinsic::ID>(Name)
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516 .Default(Intrinsic::not_intrinsic);
517 if (ID != Intrinsic::not_intrinsic)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
527 ID = StringSwitch<Intrinsic::ID>(Name)
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534 .Default(Intrinsic::not_intrinsic);
535 if (ID != Intrinsic::not_intrinsic)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
540 ID = StringSwitch<Intrinsic::ID>(Name)
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547 .Default(Intrinsic::not_intrinsic);
548 if (ID != Intrinsic::not_intrinsic)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
555 ID = StringSwitch<Intrinsic::ID>(Name)
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568 .Default(Intrinsic::not_intrinsic);
569 if (ID != Intrinsic::not_intrinsic)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
573 ID = StringSwitch<Intrinsic::ID>(Name)
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577 .Default(Intrinsic::not_intrinsic);
578 if (ID != Intrinsic::not_intrinsic)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
584 Intrinsic::ID ID = Intrinsic::not_intrinsic;
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603 ID = StringSwitch<Intrinsic::ID>(Name)
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606 .Default(Intrinsic::not_intrinsic);
607
608 if (ID != Intrinsic::not_intrinsic) {
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622 }
623
624 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
upgradeArmOrAarch64IntrinsicFunction(bool IsArm,Function * F,StringRef Name,Function * & NewFn)626 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627 StringRef Name,
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
650 Intrinsic::ID ID =
651 StringSwitch<Intrinsic::ID>(Name)
652 .Cases("v2f32.v8i8", "v4f32.v16i8",
653 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
655 .Default(Intrinsic::not_intrinsic);
656 if (ID != Intrinsic::not_intrinsic) {
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 {F->getReturnType(),
663 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front("bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
676 Intrinsic::ID ID =
677 StringSwitch<Intrinsic::ID>(Name)
678 .Case("mla",
679 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case("lalb",
682 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case("lalt",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
687 .Default(Intrinsic::not_intrinsic);
688 if (ID != Intrinsic::not_intrinsic) {
689 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
704 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
705 .StartsWith("vclz.", Intrinsic::ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
711 .Default(Intrinsic::not_intrinsic);
712 if (ID != Intrinsic::not_intrinsic) {
713 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714 F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front("vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
721 SmallVector<StringRef, 2> Groups;
722 if (vstRegex.match(Name, &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(F->getParent(),
735 StoreInts[fArgs.size() - 3], Tys);
736 else
737 NewFn = Intrinsic::getDeclaration(
738 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front("mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front("vldr.gather.");
769 if (IsGather || Name.consume_front("vstr.scatter.")) {
770 if (Name.consume_front("base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front("wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front("offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front("cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
806 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
809 .Default(Intrinsic::not_intrinsic);
810 if (ID != Intrinsic::not_intrinsic) {
811 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812 F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with("addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front("sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front("bf")) {
832 if (Name.consume_back(".lane")) {
833 // 'aarch64.sve.bf*.lane'.
834 Intrinsic::ID ID =
835 StringSwitch<Intrinsic::ID>(Name)
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
839 .Default(Intrinsic::not_intrinsic);
840 if (ID != Intrinsic::not_intrinsic) {
841 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front("addqv")) {
850 // 'aarch64.sve.addqv'.
851 if (!F->getReturnType()->isFPOrFPVectorTy())
852 return false;
853
854 auto Args = F->getFunctionType()->params();
855 Type *Tys[] = {F->getReturnType(), Args[1]};
856 NewFn = Intrinsic::getDeclaration(F->getParent(),
857 Intrinsic::aarch64_sve_faddqv, Tys);
858 return true;
859 }
860
861 if (Name.consume_front("ld")) {
862 // 'aarch64.sve.ld*'.
863 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864 if (LdRegex.match(Name)) {
865 Type *ScalarTy =
866 cast<VectorType>(F->getReturnType())->getElementType();
867 ElementCount EC =
868 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869 Type *Ty = VectorType::get(ScalarTy, EC);
870 static const Intrinsic::ID LoadIDs[] = {
871 Intrinsic::aarch64_sve_ld2_sret,
872 Intrinsic::aarch64_sve_ld3_sret,
873 Intrinsic::aarch64_sve_ld4_sret,
874 };
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 LoadIDs[Name[0] - '2'], Ty);
877 return true;
878 }
879 return false; // No other 'aarch64.sve.ld*'.
880 }
881
882 if (Name.consume_front("tuple.")) {
883 // 'aarch64.sve.tuple.*'.
884 if (Name.starts_with("get")) {
885 // 'aarch64.sve.tuple.get*'.
886 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887 NewFn = Intrinsic::getDeclaration(F->getParent(),
888 Intrinsic::vector_extract, Tys);
889 return true;
890 }
891
892 if (Name.starts_with("set")) {
893 // 'aarch64.sve.tuple.set*'.
894 auto Args = F->getFunctionType()->params();
895 Type *Tys[] = {Args[0], Args[2], Args[1]};
896 NewFn = Intrinsic::getDeclaration(F->getParent(),
897 Intrinsic::vector_insert, Tys);
898 return true;
899 }
900
901 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902 if (CreateTupleRegex.match(Name)) {
903 // 'aarch64.sve.tuple.create*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {F->getReturnType(), Args[1]};
906 NewFn = Intrinsic::getDeclaration(F->getParent(),
907 Intrinsic::vector_insert, Tys);
908 return true;
909 }
910 return false; // No other 'aarch64.sve.tuple.*'.
911 }
912 return false; // No other 'aarch64.sve.*'.
913 }
914 }
915 return false; // No other 'arm.*', 'aarch64.*'.
916 }
917
shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)918 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
919 if (Name.consume_front("abs."))
920 return StringSwitch<Intrinsic::ID>(Name)
921 .Case("bf16", Intrinsic::nvvm_abs_bf16)
922 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
923 .Default(Intrinsic::not_intrinsic);
924
925 if (Name.consume_front("fma.rn."))
926 return StringSwitch<Intrinsic::ID>(Name)
927 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
939 .Default(Intrinsic::not_intrinsic);
940
941 if (Name.consume_front("fmax."))
942 return StringSwitch<Intrinsic::ID>(Name)
943 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
944 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949 .Case("ftz.nan.xorsign.abs.bf16",
950 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951 .Case("ftz.nan.xorsign.abs.bf16x2",
952 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954 .Case("ftz.xorsign.abs.bf16x2",
955 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959 .Case("nan.xorsign.abs.bf16x2",
960 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
963 .Default(Intrinsic::not_intrinsic);
964
965 if (Name.consume_front("fmin."))
966 return StringSwitch<Intrinsic::ID>(Name)
967 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
968 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973 .Case("ftz.nan.xorsign.abs.bf16",
974 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975 .Case("ftz.nan.xorsign.abs.bf16x2",
976 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978 .Case("ftz.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983 .Case("nan.xorsign.abs.bf16x2",
984 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
987 .Default(Intrinsic::not_intrinsic);
988
989 if (Name.consume_front("neg."))
990 return StringSwitch<Intrinsic::ID>(Name)
991 .Case("bf16", Intrinsic::nvvm_neg_bf16)
992 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
993 .Default(Intrinsic::not_intrinsic);
994
995 return Intrinsic::not_intrinsic;
996 }
997
upgradeIntrinsicFunction1(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)998 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
999 bool CanUpgradeDebugIntrinsicsToRecords) {
1000 assert(F && "Illegal to upgrade a non-existent Function.");
1001
1002 StringRef Name = F->getName();
1003
1004 // Quickly eliminate it, if it's not a candidate.
1005 if (!Name.consume_front("llvm.") || Name.empty())
1006 return false;
1007
1008 switch (Name[0]) {
1009 default: break;
1010 case 'a': {
1011 bool IsArm = Name.consume_front("arm.");
1012 if (IsArm || Name.consume_front("aarch64.")) {
1013 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014 return true;
1015 break;
1016 }
1017
1018 if (Name.consume_front("amdgcn.")) {
1019 if (Name == "alignbit") {
1020 // Target specific intrinsic became redundant
1021 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022 {F->getReturnType()});
1023 return true;
1024 }
1025
1026 if (Name.consume_front("atomic.")) {
1027 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029 // there's no new declaration.
1030 NewFn = nullptr;
1031 return true;
1032 }
1033 break; // No other 'amdgcn.atomic.*'
1034 }
1035
1036 if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037 Name.starts_with("ds.fmax")) {
1038 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039 // declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043
1044 if (Name.starts_with("ldexp.")) {
1045 // Target specific intrinsic became redundant
1046 NewFn = Intrinsic::getDeclaration(
1047 F->getParent(), Intrinsic::ldexp,
1048 {F->getReturnType(), F->getArg(1)->getType()});
1049 return true;
1050 }
1051 break; // No other 'amdgcn.*'
1052 }
1053
1054 break;
1055 }
1056 case 'c': {
1057 if (F->arg_size() == 1) {
1058 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1059 .StartsWith("ctlz.", Intrinsic::ctlz)
1060 .StartsWith("cttz.", Intrinsic::cttz)
1061 .Default(Intrinsic::not_intrinsic);
1062 if (ID != Intrinsic::not_intrinsic) {
1063 rename(F);
1064 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065 F->arg_begin()->getType());
1066 return true;
1067 }
1068 }
1069
1070 if (F->arg_size() == 2 && Name == "coro.end") {
1071 rename(F);
1072 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073 return true;
1074 }
1075
1076 break;
1077 }
1078 case 'd':
1079 if (Name.consume_front("dbg.")) {
1080 // Mark debug intrinsics for upgrade to new debug format.
1081 if (CanUpgradeDebugIntrinsicsToRecords &&
1082 F->getParent()->IsNewDbgInfoFormat) {
1083 if (Name == "addr" || Name == "value" || Name == "assign" ||
1084 Name == "declare" || Name == "label") {
1085 // There's no function to replace these with.
1086 NewFn = nullptr;
1087 // But we do want these to get upgraded.
1088 return true;
1089 }
1090 }
1091 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092 // converted to DbgVariableRecords later.
1093 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094 rename(F);
1095 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096 return true;
1097 }
1098 break; // No other 'dbg.*'.
1099 }
1100 break;
1101 case 'e':
1102 if (Name.consume_front("experimental.vector.")) {
1103 Intrinsic::ID ID =
1104 StringSwitch<Intrinsic::ID>(Name)
1105 .StartsWith("extract.", Intrinsic::vector_extract)
1106 .StartsWith("insert.", Intrinsic::vector_insert)
1107 .StartsWith("splice.", Intrinsic::vector_splice)
1108 .StartsWith("reverse.", Intrinsic::vector_reverse)
1109 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1111 .Default(Intrinsic::not_intrinsic);
1112 if (ID != Intrinsic::not_intrinsic) {
1113 const auto *FT = F->getFunctionType();
1114 SmallVector<Type *, 2> Tys;
1115 if (ID == Intrinsic::vector_extract ||
1116 ID == Intrinsic::vector_interleave2)
1117 // Extracting overloads the return type.
1118 Tys.push_back(FT->getReturnType());
1119 if (ID != Intrinsic::vector_interleave2)
1120 Tys.push_back(FT->getParamType(0));
1121 if (ID == Intrinsic::vector_insert)
1122 // Inserting overloads the inserted type.
1123 Tys.push_back(FT->getParamType(1));
1124 rename(F);
1125 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126 return true;
1127 }
1128
1129 if (Name.consume_front("reduce.")) {
1130 SmallVector<StringRef, 2> Groups;
1131 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132 if (R.match(Name, &Groups))
1133 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1134 .Case("add", Intrinsic::vector_reduce_add)
1135 .Case("mul", Intrinsic::vector_reduce_mul)
1136 .Case("and", Intrinsic::vector_reduce_and)
1137 .Case("or", Intrinsic::vector_reduce_or)
1138 .Case("xor", Intrinsic::vector_reduce_xor)
1139 .Case("smax", Intrinsic::vector_reduce_smax)
1140 .Case("smin", Intrinsic::vector_reduce_smin)
1141 .Case("umax", Intrinsic::vector_reduce_umax)
1142 .Case("umin", Intrinsic::vector_reduce_umin)
1143 .Case("fmax", Intrinsic::vector_reduce_fmax)
1144 .Case("fmin", Intrinsic::vector_reduce_fmin)
1145 .Default(Intrinsic::not_intrinsic);
1146
1147 bool V2 = false;
1148 if (ID == Intrinsic::not_intrinsic) {
1149 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150 Groups.clear();
1151 V2 = true;
1152 if (R2.match(Name, &Groups))
1153 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1154 .Case("fadd", Intrinsic::vector_reduce_fadd)
1155 .Case("fmul", Intrinsic::vector_reduce_fmul)
1156 .Default(Intrinsic::not_intrinsic);
1157 }
1158 if (ID != Intrinsic::not_intrinsic) {
1159 rename(F);
1160 auto Args = F->getFunctionType()->params();
1161 NewFn =
1162 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163 return true;
1164 }
1165 break; // No other 'expermental.vector.reduce.*'.
1166 }
1167 break; // No other 'experimental.vector.*'.
1168 }
1169 break; // No other 'e*'.
1170 case 'f':
1171 if (Name.starts_with("flt.rounds")) {
1172 rename(F);
1173 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174 return true;
1175 }
1176 break;
1177 case 'i':
1178 if (Name.starts_with("invariant.group.barrier")) {
1179 // Rename invariant.group.barrier to launder.invariant.group
1180 auto Args = F->getFunctionType()->params();
1181 Type* ObjectPtr[1] = {Args[0]};
1182 rename(F);
1183 NewFn = Intrinsic::getDeclaration(F->getParent(),
1184 Intrinsic::launder_invariant_group, ObjectPtr);
1185 return true;
1186 }
1187 break;
1188 case 'm': {
1189 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190 // alignment parameter to embedding the alignment as an attribute of
1191 // the pointer args.
1192 if (unsigned ID = StringSwitch<unsigned>(Name)
1193 .StartsWith("memcpy.", Intrinsic::memcpy)
1194 .StartsWith("memmove.", Intrinsic::memmove)
1195 .Default(0)) {
1196 if (F->arg_size() == 5) {
1197 rename(F);
1198 // Get the types of dest, src, and len
1199 ArrayRef<Type *> ParamTypes =
1200 F->getFunctionType()->params().slice(0, 3);
1201 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202 return true;
1203 }
1204 }
1205 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206 rename(F);
1207 // Get the types of dest, and len
1208 const auto *FT = F->getFunctionType();
1209 Type *ParamTypes[2] = {
1210 FT->getParamType(0), // Dest
1211 FT->getParamType(2) // len
1212 };
1213 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214 ParamTypes);
1215 return true;
1216 }
1217 break;
1218 }
1219 case 'n': {
1220 if (Name.consume_front("nvvm.")) {
1221 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222 if (F->arg_size() == 1) {
1223 Intrinsic::ID IID =
1224 StringSwitch<Intrinsic::ID>(Name)
1225 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1226 .Case("clz.i", Intrinsic::ctlz)
1227 .Case("popc.i", Intrinsic::ctpop)
1228 .Default(Intrinsic::not_intrinsic);
1229 if (IID != Intrinsic::not_intrinsic) {
1230 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231 {F->getReturnType()});
1232 return true;
1233 }
1234 }
1235
1236 // Check for nvvm intrinsics that need a return type adjustment.
1237 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1238 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1239 if (IID != Intrinsic::not_intrinsic) {
1240 NewFn = nullptr;
1241 return true;
1242 }
1243 }
1244
1245 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1247 //
1248 // TODO: We could add lohi.i2d.
1249 bool Expand = false;
1250 if (Name.consume_front("abs."))
1251 // nvvm.abs.{i,ii}
1252 Expand = Name == "i" || Name == "ll";
1253 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254 Expand = true;
1255 else if (Name.consume_front("max.") || Name.consume_front("min."))
1256 // nvvm.{min,max}.{i,ii,ui,ull}
1257 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258 Name == "ui" || Name == "ull";
1259 else if (Name.consume_front("atomic.load.add."))
1260 // nvvm.atomic.load.add.{f32.p,f64.p}
1261 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262 else
1263 Expand = false;
1264
1265 if (Expand) {
1266 NewFn = nullptr;
1267 return true;
1268 }
1269 break; // No other 'nvvm.*'.
1270 }
1271 break;
1272 }
1273 case 'o':
1274 // We only need to change the name to match the mangling including the
1275 // address space.
1276 if (Name.starts_with("objectsize.")) {
1277 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279 F->getName() !=
1280 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281 rename(F);
1282 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283 Tys);
1284 return true;
1285 }
1286 }
1287 break;
1288
1289 case 'p':
1290 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291 rename(F);
1292 NewFn = Intrinsic::getDeclaration(
1293 F->getParent(), Intrinsic::ptr_annotation,
1294 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1295 return true;
1296 }
1297 break;
1298
1299 case 'r': {
1300 if (Name.consume_front("riscv.")) {
1301 Intrinsic::ID ID;
1302 ID = StringSwitch<Intrinsic::ID>(Name)
1303 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1306 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1307 .Default(Intrinsic::not_intrinsic);
1308 if (ID != Intrinsic::not_intrinsic) {
1309 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310 rename(F);
1311 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312 return true;
1313 }
1314 break; // No other applicable upgrades.
1315 }
1316
1317 ID = StringSwitch<Intrinsic::ID>(Name)
1318 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1320 .Default(Intrinsic::not_intrinsic);
1321 if (ID != Intrinsic::not_intrinsic) {
1322 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324 rename(F);
1325 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326 return true;
1327 }
1328 break; // No other applicable upgrades.
1329 }
1330
1331 ID = StringSwitch<Intrinsic::ID>(Name)
1332 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1338 .Default(Intrinsic::not_intrinsic);
1339 if (ID != Intrinsic::not_intrinsic) {
1340 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341 rename(F);
1342 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343 return true;
1344 }
1345 break; // No other applicable upgrades.
1346 }
1347 break; // No other 'riscv.*' intrinsics
1348 }
1349 } break;
1350
1351 case 's':
1352 if (Name == "stackprotectorcheck") {
1353 NewFn = nullptr;
1354 return true;
1355 }
1356 break;
1357
1358 case 'v': {
1359 if (Name == "var.annotation" && F->arg_size() == 4) {
1360 rename(F);
1361 NewFn = Intrinsic::getDeclaration(
1362 F->getParent(), Intrinsic::var_annotation,
1363 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364 return true;
1365 }
1366 break;
1367 }
1368
1369 case 'w':
1370 if (Name.consume_front("wasm.")) {
1371 Intrinsic::ID ID =
1372 StringSwitch<Intrinsic::ID>(Name)
1373 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1376 .Default(Intrinsic::not_intrinsic);
1377 if (ID != Intrinsic::not_intrinsic) {
1378 rename(F);
1379 NewFn =
1380 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381 return true;
1382 }
1383
1384 if (Name.consume_front("dot.i8x16.i7x16.")) {
1385 ID = StringSwitch<Intrinsic::ID>(Name)
1386 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387 .Case("add.signed",
1388 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1389 .Default(Intrinsic::not_intrinsic);
1390 if (ID != Intrinsic::not_intrinsic) {
1391 rename(F);
1392 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396 }
1397 break; // No other 'wasm.*'.
1398 }
1399 break;
1400
1401 case 'x':
1402 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403 return true;
1404 }
1405
1406 auto *ST = dyn_cast<StructType>(F->getReturnType());
1407 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409 // Replace return type with literal non-packed struct. Only do this for
1410 // intrinsics declared to return a struct, not for intrinsics with
1411 // overloaded return type, in which case the exact struct type will be
1412 // mangled into the name.
1413 SmallVector<Intrinsic::IITDescriptor> Desc;
1414 Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1415 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416 auto *FT = F->getFunctionType();
1417 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419 std::string Name = F->getName().str();
1420 rename(F);
1421 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422 Name, F->getParent());
1423
1424 // The new function may also need remangling.
1425 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426 NewFn = *Result;
1427 return true;
1428 }
1429 }
1430
1431 // Remangle our intrinsic since we upgrade the mangling
1432 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1433 if (Result != std::nullopt) {
1434 NewFn = *Result;
1435 return true;
1436 }
1437
1438 // This may not belong here. This function is effectively being overloaded
1439 // to both detect an intrinsic which needs upgrading, and to provide the
1440 // upgraded form of the intrinsic. We should perhaps have two separate
1441 // functions for this.
1442 return false;
1443 }
1444
UpgradeIntrinsicFunction(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1445 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1446 bool CanUpgradeDebugIntrinsicsToRecords) {
1447 NewFn = nullptr;
1448 bool Upgraded =
1449 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451
1452 // Upgrade intrinsic attributes. This does not change the function.
1453 if (NewFn)
1454 F = NewFn;
1455 if (Intrinsic::ID id = F->getIntrinsicID())
1456 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457 return Upgraded;
1458 }
1459
UpgradeGlobalVariable(GlobalVariable * GV)1460 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1461 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462 GV->getName() == "llvm.global_dtors")) ||
1463 !GV->hasInitializer())
1464 return nullptr;
1465 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466 if (!ATy)
1467 return nullptr;
1468 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469 if (!STy || STy->getNumElements() != 2)
1470 return nullptr;
1471
1472 LLVMContext &C = GV->getContext();
1473 IRBuilder<> IRB(C);
1474 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475 IRB.getPtrTy());
1476 Constant *Init = GV->getInitializer();
1477 unsigned N = Init->getNumOperands();
1478 std::vector<Constant *> NewCtors(N);
1479 for (unsigned i = 0; i != N; ++i) {
1480 auto Ctor = cast<Constant>(Init->getOperand(i));
1481 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482 Ctor->getAggregateElement(1),
1483 Constant::getNullValue(IRB.getPtrTy()));
1484 }
1485 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486
1487 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488 NewInit, GV->getName());
1489 }
1490
1491 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492 // to byte shuffles.
upgradeX86PSLLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1493 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1494 unsigned Shift) {
1495 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496 unsigned NumElts = ResultTy->getNumElements() * 8;
1497
1498 // Bitcast from a 64-bit element type to a byte element type.
1499 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501
1502 // We'll be shuffling in zeroes.
1503 Value *Res = Constant::getNullValue(VecTy);
1504
1505 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506 // we'll just return the zero vector.
1507 if (Shift < 16) {
1508 int Idxs[64];
1509 // 256/512-bit version is split into 2/4 16-byte lanes.
1510 for (unsigned l = 0; l != NumElts; l += 16)
1511 for (unsigned i = 0; i != 16; ++i) {
1512 unsigned Idx = NumElts + i - Shift;
1513 if (Idx < NumElts)
1514 Idx -= NumElts - 16; // end of lane, switch operand.
1515 Idxs[l + i] = Idx + l;
1516 }
1517
1518 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519 }
1520
1521 // Bitcast back to a 64-bit element type.
1522 return Builder.CreateBitCast(Res, ResultTy, "cast");
1523 }
1524
1525 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526 // to byte shuffles.
upgradeX86PSRLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1527 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1528 unsigned Shift) {
1529 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530 unsigned NumElts = ResultTy->getNumElements() * 8;
1531
1532 // Bitcast from a 64-bit element type to a byte element type.
1533 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535
1536 // We'll be shuffling in zeroes.
1537 Value *Res = Constant::getNullValue(VecTy);
1538
1539 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540 // we'll just return the zero vector.
1541 if (Shift < 16) {
1542 int Idxs[64];
1543 // 256/512-bit version is split into 2/4 16-byte lanes.
1544 for (unsigned l = 0; l != NumElts; l += 16)
1545 for (unsigned i = 0; i != 16; ++i) {
1546 unsigned Idx = i + Shift;
1547 if (Idx >= 16)
1548 Idx += NumElts - 16; // end of lane, switch operand.
1549 Idxs[l + i] = Idx + l;
1550 }
1551
1552 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553 }
1554
1555 // Bitcast back to a 64-bit element type.
1556 return Builder.CreateBitCast(Res, ResultTy, "cast");
1557 }
1558
getX86MaskVec(IRBuilder<> & Builder,Value * Mask,unsigned NumElts)1559 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560 unsigned NumElts) {
1561 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1562 llvm::VectorType *MaskTy = FixedVectorType::get(
1563 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564 Mask = Builder.CreateBitCast(Mask, MaskTy);
1565
1566 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567 // i8 and we need to extract down to the right number of elements.
1568 if (NumElts <= 4) {
1569 int Indices[4];
1570 for (unsigned i = 0; i != NumElts; ++i)
1571 Indices[i] = i;
1572 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573 "extract");
1574 }
1575
1576 return Mask;
1577 }
1578
emitX86Select(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1579 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580 Value *Op1) {
1581 // If the mask is all ones just emit the first operation.
1582 if (const auto *C = dyn_cast<Constant>(Mask))
1583 if (C->isAllOnesValue())
1584 return Op0;
1585
1586 Mask = getX86MaskVec(Builder, Mask,
1587 cast<FixedVectorType>(Op0->getType())->getNumElements());
1588 return Builder.CreateSelect(Mask, Op0, Op1);
1589 }
1590
emitX86ScalarSelect(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1591 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592 Value *Op1) {
1593 // If the mask is all ones just emit the first operation.
1594 if (const auto *C = dyn_cast<Constant>(Mask))
1595 if (C->isAllOnesValue())
1596 return Op0;
1597
1598 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599 Mask->getType()->getIntegerBitWidth());
1600 Mask = Builder.CreateBitCast(Mask, MaskTy);
1601 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602 return Builder.CreateSelect(Mask, Op0, Op1);
1603 }
1604
1605 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
upgradeX86ALIGNIntrinsics(IRBuilder<> & Builder,Value * Op0,Value * Op1,Value * Shift,Value * Passthru,Value * Mask,bool IsVALIGN)1608 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1609 Value *Op1, Value *Shift,
1610 Value *Passthru, Value *Mask,
1611 bool IsVALIGN) {
1612 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613
1614 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618
1619 // Mask the immediate for VALIGN.
1620 if (IsVALIGN)
1621 ShiftVal &= (NumElts - 1);
1622
1623 // If palignr is shifting the pair of vectors more than the size of two
1624 // lanes, emit zero.
1625 if (ShiftVal >= 32)
1626 return llvm::Constant::getNullValue(Op0->getType());
1627
1628 // If palignr is shifting the pair of input vectors more than one lane,
1629 // but less than two lanes, convert to shifting in zeroes.
1630 if (ShiftVal > 16) {
1631 ShiftVal -= 16;
1632 Op1 = Op0;
1633 Op0 = llvm::Constant::getNullValue(Op0->getType());
1634 }
1635
1636 int Indices[64];
1637 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1638 for (unsigned l = 0; l < NumElts; l += 16) {
1639 for (unsigned i = 0; i != 16; ++i) {
1640 unsigned Idx = ShiftVal + i;
1641 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642 Idx += NumElts - 16; // End of lane, switch operand.
1643 Indices[l + i] = Idx + l;
1644 }
1645 }
1646
1647 Value *Align = Builder.CreateShuffleVector(
1648 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649
1650 return emitX86Select(Builder, Mask, Align, Passthru);
1651 }
1652
upgradeX86VPERMT2Intrinsics(IRBuilder<> & Builder,CallBase & CI,bool ZeroMask,bool IndexForm)1653 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1654 bool ZeroMask, bool IndexForm) {
1655 Type *Ty = CI.getType();
1656 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657 unsigned EltWidth = Ty->getScalarSizeInBits();
1658 bool IsFloat = Ty->isFPOrFPVectorTy();
1659 Intrinsic::ID IID;
1660 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684 else if (VecWidth == 128 && EltWidth == 16)
1685 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686 else if (VecWidth == 256 && EltWidth == 16)
1687 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688 else if (VecWidth == 512 && EltWidth == 16)
1689 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690 else if (VecWidth == 128 && EltWidth == 8)
1691 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692 else if (VecWidth == 256 && EltWidth == 8)
1693 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694 else if (VecWidth == 512 && EltWidth == 8)
1695 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696 else
1697 llvm_unreachable("Unexpected intrinsic");
1698
1699 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700 CI.getArgOperand(2) };
1701
1702 // If this isn't index form we need to swap operand 0 and 1.
1703 if (!IndexForm)
1704 std::swap(Args[0], Args[1]);
1705
1706 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707 Args);
1708 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709 : Builder.CreateBitCast(CI.getArgOperand(1),
1710 Ty);
1711 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712 }
1713
upgradeX86BinaryIntrinsics(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1714 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1715 Intrinsic::ID IID) {
1716 Type *Ty = CI.getType();
1717 Value *Op0 = CI.getOperand(0);
1718 Value *Op1 = CI.getOperand(1);
1719 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(2);
1724 Value *Mask = CI.getOperand(3);
1725 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726 }
1727 return Res;
1728 }
1729
upgradeX86Rotate(IRBuilder<> & Builder,CallBase & CI,bool IsRotateRight)1730 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1731 bool IsRotateRight) {
1732 Type *Ty = CI.getType();
1733 Value *Src = CI.getArgOperand(0);
1734 Value *Amt = CI.getArgOperand(1);
1735
1736 // Amount may be scalar immediate, in which case create a splat vector.
1737 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738 // we only care about the lowest log2 bits anyway.
1739 if (Amt->getType() != Ty) {
1740 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743 }
1744
1745 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748
1749 if (CI.arg_size() == 4) { // For masked intrinsics.
1750 Value *VecSrc = CI.getOperand(2);
1751 Value *Mask = CI.getOperand(3);
1752 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753 }
1754 return Res;
1755 }
1756
upgradeX86vpcom(IRBuilder<> & Builder,CallBase & CI,unsigned Imm,bool IsSigned)1757 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758 bool IsSigned) {
1759 Type *Ty = CI.getType();
1760 Value *LHS = CI.getArgOperand(0);
1761 Value *RHS = CI.getArgOperand(1);
1762
1763 CmpInst::Predicate Pred;
1764 switch (Imm) {
1765 case 0x0:
1766 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767 break;
1768 case 0x1:
1769 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770 break;
1771 case 0x2:
1772 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773 break;
1774 case 0x3:
1775 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776 break;
1777 case 0x4:
1778 Pred = ICmpInst::ICMP_EQ;
1779 break;
1780 case 0x5:
1781 Pred = ICmpInst::ICMP_NE;
1782 break;
1783 case 0x6:
1784 return Constant::getNullValue(Ty); // FALSE
1785 case 0x7:
1786 return Constant::getAllOnesValue(Ty); // TRUE
1787 default:
1788 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789 }
1790
1791 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793 return Ext;
1794 }
1795
upgradeX86ConcatShift(IRBuilder<> & Builder,CallBase & CI,bool IsShiftRight,bool ZeroMask)1796 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1797 bool IsShiftRight, bool ZeroMask) {
1798 Type *Ty = CI.getType();
1799 Value *Op0 = CI.getArgOperand(0);
1800 Value *Op1 = CI.getArgOperand(1);
1801 Value *Amt = CI.getArgOperand(2);
1802
1803 if (IsShiftRight)
1804 std::swap(Op0, Op1);
1805
1806 // Amount may be scalar immediate, in which case create a splat vector.
1807 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808 // we only care about the lowest log2 bits anyway.
1809 if (Amt->getType() != Ty) {
1810 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813 }
1814
1815 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818
1819 unsigned NumArgs = CI.arg_size();
1820 if (NumArgs >= 4) { // For masked intrinsics.
1821 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1823 CI.getArgOperand(0);
1824 Value *Mask = CI.getOperand(NumArgs - 1);
1825 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826 }
1827 return Res;
1828 }
1829
upgradeMaskedStore(IRBuilder<> & Builder,Value * Ptr,Value * Data,Value * Mask,bool Aligned)1830 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1831 Value *Mask, bool Aligned) {
1832 // Cast the pointer to the right type.
1833 Ptr = Builder.CreateBitCast(Ptr,
1834 llvm::PointerType::getUnqual(Data->getType()));
1835 const Align Alignment =
1836 Aligned
1837 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838 : Align(1);
1839
1840 // If the mask is all ones just emit a regular store.
1841 if (const auto *C = dyn_cast<Constant>(Mask))
1842 if (C->isAllOnesValue())
1843 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844
1845 // Convert the mask from an integer type to a vector of i1.
1846 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847 Mask = getX86MaskVec(Builder, Mask, NumElts);
1848 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849 }
1850
upgradeMaskedLoad(IRBuilder<> & Builder,Value * Ptr,Value * Passthru,Value * Mask,bool Aligned)1851 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1852 Value *Passthru, Value *Mask, bool Aligned) {
1853 Type *ValTy = Passthru->getType();
1854 // Cast the pointer to the right type.
1855 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1856 const Align Alignment =
1857 Aligned
1858 ? Align(
1859 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1860 8)
1861 : Align(1);
1862
1863 // If the mask is all ones just emit a regular store.
1864 if (const auto *C = dyn_cast<Constant>(Mask))
1865 if (C->isAllOnesValue())
1866 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867
1868 // Convert the mask from an integer type to a vector of i1.
1869 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870 Mask = getX86MaskVec(Builder, Mask, NumElts);
1871 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872 }
1873
upgradeAbs(IRBuilder<> & Builder,CallBase & CI)1874 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875 Type *Ty = CI.getType();
1876 Value *Op0 = CI.getArgOperand(0);
1877 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879 if (CI.arg_size() == 3)
1880 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881 return Res;
1882 }
1883
upgradePMULDQ(IRBuilder<> & Builder,CallBase & CI,bool IsSigned)1884 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885 Type *Ty = CI.getType();
1886
1887 // Arguments have a vXi32 type so cast to vXi64.
1888 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890
1891 if (IsSigned) {
1892 // Shift left then arithmetic shift right.
1893 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894 LHS = Builder.CreateShl(LHS, ShiftAmt);
1895 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896 RHS = Builder.CreateShl(RHS, ShiftAmt);
1897 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898 } else {
1899 // Clear the upper bits.
1900 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901 LHS = Builder.CreateAnd(LHS, Mask);
1902 RHS = Builder.CreateAnd(RHS, Mask);
1903 }
1904
1905 Value *Res = Builder.CreateMul(LHS, RHS);
1906
1907 if (CI.arg_size() == 4)
1908 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909
1910 return Res;
1911 }
1912
1913 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
applyX86MaskOn1BitsVec(IRBuilder<> & Builder,Value * Vec,Value * Mask)1914 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1915 Value *Mask) {
1916 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917 if (Mask) {
1918 const auto *C = dyn_cast<Constant>(Mask);
1919 if (!C || !C->isAllOnesValue())
1920 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921 }
1922
1923 if (NumElts < 8) {
1924 int Indices[8];
1925 for (unsigned i = 0; i != NumElts; ++i)
1926 Indices[i] = i;
1927 for (unsigned i = NumElts; i != 8; ++i)
1928 Indices[i] = NumElts + i % NumElts;
1929 Vec = Builder.CreateShuffleVector(Vec,
1930 Constant::getNullValue(Vec->getType()),
1931 Indices);
1932 }
1933 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934 }
1935
upgradeMaskedCompare(IRBuilder<> & Builder,CallBase & CI,unsigned CC,bool Signed)1936 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1937 unsigned CC, bool Signed) {
1938 Value *Op0 = CI.getArgOperand(0);
1939 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940
1941 Value *Cmp;
1942 if (CC == 3) {
1943 Cmp = Constant::getNullValue(
1944 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945 } else if (CC == 7) {
1946 Cmp = Constant::getAllOnesValue(
1947 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948 } else {
1949 ICmpInst::Predicate Pred;
1950 switch (CC) {
1951 default: llvm_unreachable("Unknown condition code");
1952 case 0: Pred = ICmpInst::ICMP_EQ; break;
1953 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955 case 4: Pred = ICmpInst::ICMP_NE; break;
1956 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958 }
1959 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960 }
1961
1962 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963
1964 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965 }
1966
1967 // Replace a masked intrinsic with an older unmasked intrinsic.
upgradeX86MaskedShift(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1968 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1969 Intrinsic::ID IID) {
1970 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971 Value *Rep = Builder.CreateCall(Intrin,
1972 { CI.getArgOperand(0), CI.getArgOperand(1) });
1973 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974 }
1975
upgradeMaskedMove(IRBuilder<> & Builder,CallBase & CI)1976 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1977 Value* A = CI.getArgOperand(0);
1978 Value* B = CI.getArgOperand(1);
1979 Value* Src = CI.getArgOperand(2);
1980 Value* Mask = CI.getArgOperand(3);
1981
1982 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988 }
1989
upgradeMaskToInt(IRBuilder<> & Builder,CallBase & CI)1990 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1991 Value* Op = CI.getArgOperand(0);
1992 Type* ReturnOp = CI.getType();
1993 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996 }
1997
1998 // Replace intrinsic with unmasked version and a select.
upgradeAVX512MaskToSelect(StringRef Name,IRBuilder<> & Builder,CallBase & CI,Value * & Rep)1999 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2000 CallBase &CI, Value *&Rep) {
2001 Name = Name.substr(12); // Remove avx512.mask.
2002
2003 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005 Intrinsic::ID IID;
2006 if (Name.starts_with("max.p")) {
2007 if (VecWidth == 128 && EltWidth == 32)
2008 IID = Intrinsic::x86_sse_max_ps;
2009 else if (VecWidth == 128 && EltWidth == 64)
2010 IID = Intrinsic::x86_sse2_max_pd;
2011 else if (VecWidth == 256 && EltWidth == 32)
2012 IID = Intrinsic::x86_avx_max_ps_256;
2013 else if (VecWidth == 256 && EltWidth == 64)
2014 IID = Intrinsic::x86_avx_max_pd_256;
2015 else
2016 llvm_unreachable("Unexpected intrinsic");
2017 } else if (Name.starts_with("min.p")) {
2018 if (VecWidth == 128 && EltWidth == 32)
2019 IID = Intrinsic::x86_sse_min_ps;
2020 else if (VecWidth == 128 && EltWidth == 64)
2021 IID = Intrinsic::x86_sse2_min_pd;
2022 else if (VecWidth == 256 && EltWidth == 32)
2023 IID = Intrinsic::x86_avx_min_ps_256;
2024 else if (VecWidth == 256 && EltWidth == 64)
2025 IID = Intrinsic::x86_avx_min_pd_256;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with("pshuf.b.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pshuf_b;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pshuf_b_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with("pmul.hr.sw.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with("pmulh.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_sse2_pmulh_w;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmulh_w;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmulh_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with("pmulhu.w.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_pmulhu_w;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_pmulhu_w;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with("pmaddw.d.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_pmadd_wd;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_pmadd_wd;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pmaddubs.w.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("packsswb.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse2_packsswb_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packsswb;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packsswb_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("packssdw.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_packssdw_128;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_packssdw;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_packssdw_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("packuswb.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_packuswb_128;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_packuswb;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_packuswb_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("packusdw.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse41_packusdw;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_packusdw;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_packusdw_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("vpermilvar.")) {
2119 if (VecWidth == 128 && EltWidth == 32)
2120 IID = Intrinsic::x86_avx_vpermilvar_ps;
2121 else if (VecWidth == 128 && EltWidth == 64)
2122 IID = Intrinsic::x86_avx_vpermilvar_pd;
2123 else if (VecWidth == 256 && EltWidth == 32)
2124 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125 else if (VecWidth == 256 && EltWidth == 64)
2126 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127 else if (VecWidth == 512 && EltWidth == 32)
2128 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129 else if (VecWidth == 512 && EltWidth == 64)
2130 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131 else
2132 llvm_unreachable("Unexpected intrinsic");
2133 } else if (Name == "cvtpd2dq.256") {
2134 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135 } else if (Name == "cvtpd2ps.256") {
2136 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137 } else if (Name == "cvttpd2dq.256") {
2138 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139 } else if (Name == "cvttps2dq.128") {
2140 IID = Intrinsic::x86_sse2_cvttps2dq;
2141 } else if (Name == "cvttps2dq.256") {
2142 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143 } else if (Name.starts_with("permvar.")) {
2144 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx2_permps;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx2_permd;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_permvar_df_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_permvar_di_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_permvar_sf_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_permvar_si_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_permvar_df_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_permvar_di_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_permvar_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_permvar_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_permvar_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_permvar_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_permvar_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_permvar_qi_512;
2173 else
2174 llvm_unreachable("Unexpected intrinsic");
2175 } else if (Name.starts_with("dbpsadbw.")) {
2176 if (VecWidth == 128)
2177 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178 else if (VecWidth == 256)
2179 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180 else if (VecWidth == 512)
2181 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182 else
2183 llvm_unreachable("Unexpected intrinsic");
2184 } else if (Name.starts_with("pmultishift.qb.")) {
2185 if (VecWidth == 128)
2186 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187 else if (VecWidth == 256)
2188 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189 else if (VecWidth == 512)
2190 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191 else
2192 llvm_unreachable("Unexpected intrinsic");
2193 } else if (Name.starts_with("conflict.")) {
2194 if (Name[9] == 'd' && VecWidth == 128)
2195 IID = Intrinsic::x86_avx512_conflict_d_128;
2196 else if (Name[9] == 'd' && VecWidth == 256)
2197 IID = Intrinsic::x86_avx512_conflict_d_256;
2198 else if (Name[9] == 'd' && VecWidth == 512)
2199 IID = Intrinsic::x86_avx512_conflict_d_512;
2200 else if (Name[9] == 'q' && VecWidth == 128)
2201 IID = Intrinsic::x86_avx512_conflict_q_128;
2202 else if (Name[9] == 'q' && VecWidth == 256)
2203 IID = Intrinsic::x86_avx512_conflict_q_256;
2204 else if (Name[9] == 'q' && VecWidth == 512)
2205 IID = Intrinsic::x86_avx512_conflict_q_512;
2206 else
2207 llvm_unreachable("Unexpected intrinsic");
2208 } else if (Name.starts_with("pavg.")) {
2209 if (Name[5] == 'b' && VecWidth == 128)
2210 IID = Intrinsic::x86_sse2_pavg_b;
2211 else if (Name[5] == 'b' && VecWidth == 256)
2212 IID = Intrinsic::x86_avx2_pavg_b;
2213 else if (Name[5] == 'b' && VecWidth == 512)
2214 IID = Intrinsic::x86_avx512_pavg_b_512;
2215 else if (Name[5] == 'w' && VecWidth == 128)
2216 IID = Intrinsic::x86_sse2_pavg_w;
2217 else if (Name[5] == 'w' && VecWidth == 256)
2218 IID = Intrinsic::x86_avx2_pavg_w;
2219 else if (Name[5] == 'w' && VecWidth == 512)
2220 IID = Intrinsic::x86_avx512_pavg_w_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else
2224 return false;
2225
2226 SmallVector<Value *, 4> Args(CI.args());
2227 Args.pop_back();
2228 Args.pop_back();
2229 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230 Args);
2231 unsigned NumArgs = CI.arg_size();
2232 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233 CI.getArgOperand(NumArgs - 2));
2234 return true;
2235 }
2236
2237 /// Upgrade comment in call to inline asm that represents an objc retain release
2238 /// marker.
UpgradeInlineAsmString(std::string * AsmStr)2239 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240 size_t Pos;
2241 if (AsmStr->find("mov\tfp") == 0 &&
2242 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2244 AsmStr->replace(Pos, 1, ";");
2245 }
2246 }
2247
upgradeX86IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2248 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2249 IRBuilder<> &Builder) {
2250 LLVMContext &C = F->getContext();
2251 Value *Rep = nullptr;
2252
2253 if (Name.starts_with("sse4a.movnt.")) {
2254 SmallVector<Metadata *, 1> Elts;
2255 Elts.push_back(
2256 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2257 MDNode *Node = MDNode::get(C, Elts);
2258
2259 Value *Arg0 = CI->getArgOperand(0);
2260 Value *Arg1 = CI->getArgOperand(1);
2261
2262 // Nontemporal (unaligned) store of the 0'th element of the float/double
2263 // vector.
2264 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2265 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2266 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2267 Value *Extract =
2268 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2269
2270 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2271 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2272 } else if (Name.starts_with("avx.movnt.") ||
2273 Name.starts_with("avx512.storent.")) {
2274 SmallVector<Metadata *, 1> Elts;
2275 Elts.push_back(
2276 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2277 MDNode *Node = MDNode::get(C, Elts);
2278
2279 Value *Arg0 = CI->getArgOperand(0);
2280 Value *Arg1 = CI->getArgOperand(1);
2281
2282 // Convert the type of the pointer to a pointer to the stored type.
2283 Value *BC = Builder.CreateBitCast(
2284 Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2285 StoreInst *SI = Builder.CreateAlignedStore(
2286 Arg1, BC,
2287 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2288 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2289 } else if (Name == "sse2.storel.dq") {
2290 Value *Arg0 = CI->getArgOperand(0);
2291 Value *Arg1 = CI->getArgOperand(1);
2292
2293 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2294 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2295 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2296 Value *BC = Builder.CreateBitCast(
2297 Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2298 Builder.CreateAlignedStore(Elt, BC, Align(1));
2299 } else if (Name.starts_with("sse.storeu.") ||
2300 Name.starts_with("sse2.storeu.") ||
2301 Name.starts_with("avx.storeu.")) {
2302 Value *Arg0 = CI->getArgOperand(0);
2303 Value *Arg1 = CI->getArgOperand(1);
2304
2305 Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2306 "cast");
2307 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2308 } else if (Name == "avx512.mask.store.ss") {
2309 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2310 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2311 Mask, false);
2312 } else if (Name.starts_with("avx512.mask.store")) {
2313 // "avx512.mask.storeu." or "avx512.mask.store."
2314 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2315 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2316 CI->getArgOperand(2), Aligned);
2317 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2318 // Upgrade packed integer vector compare intrinsics to compare instructions.
2319 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2320 bool CmpEq = Name[9] == 'e';
2321 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2322 CI->getArgOperand(0), CI->getArgOperand(1));
2323 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2324 } else if (Name.starts_with("avx512.broadcastm")) {
2325 Type *ExtTy = Type::getInt32Ty(C);
2326 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2327 ExtTy = Type::getInt64Ty(C);
2328 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2329 ExtTy->getPrimitiveSizeInBits();
2330 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2331 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2332 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2333 Value *Vec = CI->getArgOperand(0);
2334 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2335 Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
2336 Elt0->getType());
2337 Elt0 = Builder.CreateCall(Intr, Elt0);
2338 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2339 } else if (Name.starts_with("avx.sqrt.p") ||
2340 Name.starts_with("sse2.sqrt.p") ||
2341 Name.starts_with("sse.sqrt.p")) {
2342 Rep =
2343 Builder.CreateCall(Intrinsic::getDeclaration(
2344 F->getParent(), Intrinsic::sqrt, CI->getType()),
2345 {CI->getArgOperand(0)});
2346 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2347 if (CI->arg_size() == 4 &&
2348 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2349 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2350 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2351 : Intrinsic::x86_avx512_sqrt_pd_512;
2352
2353 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2354 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
2355 Args);
2356 } else {
2357 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2358 Intrinsic::sqrt,
2359 CI->getType()),
2360 {CI->getArgOperand(0)});
2361 }
2362 Rep =
2363 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2364 } else if (Name.starts_with("avx512.ptestm") ||
2365 Name.starts_with("avx512.ptestnm")) {
2366 Value *Op0 = CI->getArgOperand(0);
2367 Value *Op1 = CI->getArgOperand(1);
2368 Value *Mask = CI->getArgOperand(2);
2369 Rep = Builder.CreateAnd(Op0, Op1);
2370 llvm::Type *Ty = Op0->getType();
2371 Value *Zero = llvm::Constant::getNullValue(Ty);
2372 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2373 ? ICmpInst::ICMP_NE
2374 : ICmpInst::ICMP_EQ;
2375 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2376 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2377 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2378 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2379 ->getNumElements();
2380 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2381 Rep =
2382 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2383 } else if (Name.starts_with("avx512.kunpck")) {
2384 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2385 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2386 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2387 int Indices[64];
2388 for (unsigned i = 0; i != NumElts; ++i)
2389 Indices[i] = i;
2390
2391 // First extract half of each vector. This gives better codegen than
2392 // doing it in a single shuffle.
2393 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2394 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2395 // Concat the vectors.
2396 // NOTE: Operands have to be swapped to match intrinsic definition.
2397 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2398 Rep = Builder.CreateBitCast(Rep, CI->getType());
2399 } else if (Name == "avx512.kand.w") {
2400 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2401 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2402 Rep = Builder.CreateAnd(LHS, RHS);
2403 Rep = Builder.CreateBitCast(Rep, CI->getType());
2404 } else if (Name == "avx512.kandn.w") {
2405 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2406 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2407 LHS = Builder.CreateNot(LHS);
2408 Rep = Builder.CreateAnd(LHS, RHS);
2409 Rep = Builder.CreateBitCast(Rep, CI->getType());
2410 } else if (Name == "avx512.kor.w") {
2411 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2412 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2413 Rep = Builder.CreateOr(LHS, RHS);
2414 Rep = Builder.CreateBitCast(Rep, CI->getType());
2415 } else if (Name == "avx512.kxor.w") {
2416 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2417 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2418 Rep = Builder.CreateXor(LHS, RHS);
2419 Rep = Builder.CreateBitCast(Rep, CI->getType());
2420 } else if (Name == "avx512.kxnor.w") {
2421 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2422 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2423 LHS = Builder.CreateNot(LHS);
2424 Rep = Builder.CreateXor(LHS, RHS);
2425 Rep = Builder.CreateBitCast(Rep, CI->getType());
2426 } else if (Name == "avx512.knot.w") {
2427 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2428 Rep = Builder.CreateNot(Rep);
2429 Rep = Builder.CreateBitCast(Rep, CI->getType());
2430 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2431 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2432 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2433 Rep = Builder.CreateOr(LHS, RHS);
2434 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2435 Value *C;
2436 if (Name[14] == 'c')
2437 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2438 else
2439 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2440 Rep = Builder.CreateICmpEQ(Rep, C);
2441 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2442 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2443 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2444 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2445 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2446 Type *I32Ty = Type::getInt32Ty(C);
2447 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2448 ConstantInt::get(I32Ty, 0));
2449 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2450 ConstantInt::get(I32Ty, 0));
2451 Value *EltOp;
2452 if (Name.contains(".add."))
2453 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2454 else if (Name.contains(".sub."))
2455 EltOp = Builder.CreateFSub(Elt0, Elt1);
2456 else if (Name.contains(".mul."))
2457 EltOp = Builder.CreateFMul(Elt0, Elt1);
2458 else
2459 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2460 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2461 ConstantInt::get(I32Ty, 0));
2462 } else if (Name.starts_with("avx512.mask.pcmp")) {
2463 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2464 bool CmpEq = Name[16] == 'e';
2465 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2466 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2467 Type *OpTy = CI->getArgOperand(0)->getType();
2468 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2469 Intrinsic::ID IID;
2470 switch (VecWidth) {
2471 default:
2472 llvm_unreachable("Unexpected intrinsic");
2473 case 128:
2474 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2475 break;
2476 case 256:
2477 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2478 break;
2479 case 512:
2480 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2481 break;
2482 }
2483
2484 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2485 {CI->getOperand(0), CI->getArgOperand(1)});
2486 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2487 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2488 Type *OpTy = CI->getArgOperand(0)->getType();
2489 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2490 unsigned EltWidth = OpTy->getScalarSizeInBits();
2491 Intrinsic::ID IID;
2492 if (VecWidth == 128 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2494 else if (VecWidth == 256 && EltWidth == 32)
2495 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2496 else if (VecWidth == 512 && EltWidth == 32)
2497 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2498 else if (VecWidth == 128 && EltWidth == 64)
2499 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2500 else if (VecWidth == 256 && EltWidth == 64)
2501 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2502 else if (VecWidth == 512 && EltWidth == 64)
2503 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506
2507 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2508 {CI->getOperand(0), CI->getArgOperand(1)});
2509 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2510 } else if (Name.starts_with("avx512.cmp.p")) {
2511 SmallVector<Value *, 4> Args(CI->args());
2512 Type *OpTy = Args[0]->getType();
2513 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2514 unsigned EltWidth = OpTy->getScalarSizeInBits();
2515 Intrinsic::ID IID;
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2518 else if (VecWidth == 256 && EltWidth == 32)
2519 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2520 else if (VecWidth == 512 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2522 else if (VecWidth == 128 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2524 else if (VecWidth == 256 && EltWidth == 64)
2525 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2526 else if (VecWidth == 512 && EltWidth == 64)
2527 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530
2531 Value *Mask = Constant::getAllOnesValue(CI->getType());
2532 if (VecWidth == 512)
2533 std::swap(Mask, Args.back());
2534 Args.push_back(Mask);
2535
2536 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2537 Args);
2538 } else if (Name.starts_with("avx512.mask.cmp.")) {
2539 // Integer compare intrinsics.
2540 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2541 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2542 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2543 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2544 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2545 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2546 Name.starts_with("avx512.cvtw2mask.") ||
2547 Name.starts_with("avx512.cvtd2mask.") ||
2548 Name.starts_with("avx512.cvtq2mask.")) {
2549 Value *Op = CI->getArgOperand(0);
2550 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2551 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2552 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2553 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2554 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2555 Name.starts_with("avx512.mask.pabs")) {
2556 Rep = upgradeAbs(Builder, *CI);
2557 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2558 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2559 Name.starts_with("avx512.mask.pmaxs")) {
2560 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2561 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2562 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2563 Name.starts_with("avx512.mask.pmaxu")) {
2564 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2565 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2566 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2567 Name.starts_with("avx512.mask.pmins")) {
2568 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2569 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2570 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2571 Name.starts_with("avx512.mask.pminu")) {
2572 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2573 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2574 Name == "avx512.pmulu.dq.512" ||
2575 Name.starts_with("avx512.mask.pmulu.dq.")) {
2576 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2577 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2578 Name == "avx512.pmul.dq.512" ||
2579 Name.starts_with("avx512.mask.pmul.dq.")) {
2580 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2581 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2582 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2583 Rep =
2584 Builder.CreateSIToFP(CI->getArgOperand(1),
2585 cast<VectorType>(CI->getType())->getElementType());
2586 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2587 } else if (Name == "avx512.cvtusi2sd") {
2588 Rep =
2589 Builder.CreateUIToFP(CI->getArgOperand(1),
2590 cast<VectorType>(CI->getType())->getElementType());
2591 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2592 } else if (Name == "sse2.cvtss2sd") {
2593 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2594 Rep = Builder.CreateFPExt(
2595 Rep, cast<VectorType>(CI->getType())->getElementType());
2596 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2597 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2598 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2599 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2600 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2601 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2602 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2603 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2604 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2605 Name == "avx512.mask.cvtqq2ps.256" ||
2606 Name == "avx512.mask.cvtqq2ps.512" ||
2607 Name == "avx512.mask.cvtuqq2ps.256" ||
2608 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2609 Name == "avx.cvt.ps2.pd.256" ||
2610 Name == "avx512.mask.cvtps2pd.128" ||
2611 Name == "avx512.mask.cvtps2pd.256") {
2612 auto *DstTy = cast<FixedVectorType>(CI->getType());
2613 Rep = CI->getArgOperand(0);
2614 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2615
2616 unsigned NumDstElts = DstTy->getNumElements();
2617 if (NumDstElts < SrcTy->getNumElements()) {
2618 assert(NumDstElts == 2 && "Unexpected vector size");
2619 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2620 }
2621
2622 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2623 bool IsUnsigned = Name.contains("cvtu");
2624 if (IsPS2PD)
2625 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2626 else if (CI->arg_size() == 4 &&
2627 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2628 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2629 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2630 : Intrinsic::x86_avx512_sitofp_round;
2631 Function *F =
2632 Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
2633 Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
2634 } else {
2635 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2636 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2637 }
2638
2639 if (CI->arg_size() >= 3)
2640 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2641 CI->getArgOperand(1));
2642 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2643 Name.starts_with("vcvtph2ps.")) {
2644 auto *DstTy = cast<FixedVectorType>(CI->getType());
2645 Rep = CI->getArgOperand(0);
2646 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2647 unsigned NumDstElts = DstTy->getNumElements();
2648 if (NumDstElts != SrcTy->getNumElements()) {
2649 assert(NumDstElts == 4 && "Unexpected vector size");
2650 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2651 }
2652 Rep = Builder.CreateBitCast(
2653 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2654 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2655 if (CI->arg_size() >= 3)
2656 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2657 CI->getArgOperand(1));
2658 } else if (Name.starts_with("avx512.mask.load")) {
2659 // "avx512.mask.loadu." or "avx512.mask.load."
2660 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2661 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2662 CI->getArgOperand(2), Aligned);
2663 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2664 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2665 Type *PtrTy = ResultTy->getElementType();
2666
2667 // Cast the pointer to element type.
2668 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2669 llvm::PointerType::getUnqual(PtrTy));
2670
2671 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2672 ResultTy->getNumElements());
2673
2674 Function *ELd = Intrinsic::getDeclaration(
2675 F->getParent(), Intrinsic::masked_expandload, ResultTy);
2676 Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});
2677 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2678 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2679 Type *PtrTy = ResultTy->getElementType();
2680
2681 // Cast the pointer to element type.
2682 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2683 llvm::PointerType::getUnqual(PtrTy));
2684
2685 Value *MaskVec =
2686 getX86MaskVec(Builder, CI->getArgOperand(2),
2687 cast<FixedVectorType>(ResultTy)->getNumElements());
2688
2689 Function *CSt = Intrinsic::getDeclaration(
2690 F->getParent(), Intrinsic::masked_compressstore, ResultTy);
2691 Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});
2692 } else if (Name.starts_with("avx512.mask.compress.") ||
2693 Name.starts_with("avx512.mask.expand.")) {
2694 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2695
2696 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2697 ResultTy->getNumElements());
2698
2699 bool IsCompress = Name[12] == 'c';
2700 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2701 : Intrinsic::x86_avx512_mask_expand;
2702 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2703 Rep = Builder.CreateCall(Intr,
2704 {CI->getOperand(0), CI->getOperand(1), MaskVec});
2705 } else if (Name.starts_with("xop.vpcom")) {
2706 bool IsSigned;
2707 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2708 Name.ends_with("uq"))
2709 IsSigned = false;
2710 else if (Name.ends_with("b") || Name.ends_with("w") ||
2711 Name.ends_with("d") || Name.ends_with("q"))
2712 IsSigned = true;
2713 else
2714 llvm_unreachable("Unknown suffix");
2715
2716 unsigned Imm;
2717 if (CI->arg_size() == 3) {
2718 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2719 } else {
2720 Name = Name.substr(9); // strip off "xop.vpcom"
2721 if (Name.starts_with("lt"))
2722 Imm = 0;
2723 else if (Name.starts_with("le"))
2724 Imm = 1;
2725 else if (Name.starts_with("gt"))
2726 Imm = 2;
2727 else if (Name.starts_with("ge"))
2728 Imm = 3;
2729 else if (Name.starts_with("eq"))
2730 Imm = 4;
2731 else if (Name.starts_with("ne"))
2732 Imm = 5;
2733 else if (Name.starts_with("false"))
2734 Imm = 6;
2735 else if (Name.starts_with("true"))
2736 Imm = 7;
2737 else
2738 llvm_unreachable("Unknown condition");
2739 }
2740
2741 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2742 } else if (Name.starts_with("xop.vpcmov")) {
2743 Value *Sel = CI->getArgOperand(2);
2744 Value *NotSel = Builder.CreateNot(Sel);
2745 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2746 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2747 Rep = Builder.CreateOr(Sel0, Sel1);
2748 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2749 Name.starts_with("avx512.mask.prol")) {
2750 Rep = upgradeX86Rotate(Builder, *CI, false);
2751 } else if (Name.starts_with("avx512.pror") ||
2752 Name.starts_with("avx512.mask.pror")) {
2753 Rep = upgradeX86Rotate(Builder, *CI, true);
2754 } else if (Name.starts_with("avx512.vpshld.") ||
2755 Name.starts_with("avx512.mask.vpshld") ||
2756 Name.starts_with("avx512.maskz.vpshld")) {
2757 bool ZeroMask = Name[11] == 'z';
2758 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2759 } else if (Name.starts_with("avx512.vpshrd.") ||
2760 Name.starts_with("avx512.mask.vpshrd") ||
2761 Name.starts_with("avx512.maskz.vpshrd")) {
2762 bool ZeroMask = Name[11] == 'z';
2763 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2764 } else if (Name == "sse42.crc32.64.8") {
2765 Function *CRC32 = Intrinsic::getDeclaration(
2766 F->getParent(), Intrinsic::x86_sse42_crc32_32_8);
2767 Value *Trunc0 =
2768 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2769 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2770 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2771 } else if (Name.starts_with("avx.vbroadcast.s") ||
2772 Name.starts_with("avx512.vbroadcast.s")) {
2773 // Replace broadcasts with a series of insertelements.
2774 auto *VecTy = cast<FixedVectorType>(CI->getType());
2775 Type *EltTy = VecTy->getElementType();
2776 unsigned EltNum = VecTy->getNumElements();
2777 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2778 Type *I32Ty = Type::getInt32Ty(C);
2779 Rep = PoisonValue::get(VecTy);
2780 for (unsigned I = 0; I < EltNum; ++I)
2781 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2782 } else if (Name.starts_with("sse41.pmovsx") ||
2783 Name.starts_with("sse41.pmovzx") ||
2784 Name.starts_with("avx2.pmovsx") ||
2785 Name.starts_with("avx2.pmovzx") ||
2786 Name.starts_with("avx512.mask.pmovsx") ||
2787 Name.starts_with("avx512.mask.pmovzx")) {
2788 auto *DstTy = cast<FixedVectorType>(CI->getType());
2789 unsigned NumDstElts = DstTy->getNumElements();
2790
2791 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2792 SmallVector<int, 8> ShuffleMask(NumDstElts);
2793 for (unsigned i = 0; i != NumDstElts; ++i)
2794 ShuffleMask[i] = i;
2795
2796 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2797
2798 bool DoSext = Name.contains("pmovsx");
2799 Rep =
2800 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2801 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2802 if (CI->arg_size() == 3)
2803 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2804 CI->getArgOperand(1));
2805 } else if (Name == "avx512.mask.pmov.qd.256" ||
2806 Name == "avx512.mask.pmov.qd.512" ||
2807 Name == "avx512.mask.pmov.wb.256" ||
2808 Name == "avx512.mask.pmov.wb.512") {
2809 Type *Ty = CI->getArgOperand(1)->getType();
2810 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2811 Rep =
2812 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2813 } else if (Name.starts_with("avx.vbroadcastf128") ||
2814 Name == "avx2.vbroadcasti128") {
2815 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2816 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2817 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2818 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2819 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2820 PointerType::getUnqual(VT));
2821 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2822 if (NumSrcElts == 2)
2823 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2824 else
2825 Rep = Builder.CreateShuffleVector(Load,
2826 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2827 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2828 Name.starts_with("avx512.mask.shuf.f")) {
2829 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2830 Type *VT = CI->getType();
2831 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2832 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2833 unsigned ControlBitsMask = NumLanes - 1;
2834 unsigned NumControlBits = NumLanes / 2;
2835 SmallVector<int, 8> ShuffleMask(0);
2836
2837 for (unsigned l = 0; l != NumLanes; ++l) {
2838 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2839 // We actually need the other source.
2840 if (l >= NumLanes / 2)
2841 LaneMask += NumLanes;
2842 for (unsigned i = 0; i != NumElementsInLane; ++i)
2843 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2844 }
2845 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2846 CI->getArgOperand(1), ShuffleMask);
2847 Rep =
2848 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2849 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2850 Name.starts_with("avx512.mask.broadcasti")) {
2851 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2852 ->getNumElements();
2853 unsigned NumDstElts =
2854 cast<FixedVectorType>(CI->getType())->getNumElements();
2855
2856 SmallVector<int, 8> ShuffleMask(NumDstElts);
2857 for (unsigned i = 0; i != NumDstElts; ++i)
2858 ShuffleMask[i] = i % NumSrcElts;
2859
2860 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2861 CI->getArgOperand(0), ShuffleMask);
2862 Rep =
2863 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2864 } else if (Name.starts_with("avx2.pbroadcast") ||
2865 Name.starts_with("avx2.vbroadcast") ||
2866 Name.starts_with("avx512.pbroadcast") ||
2867 Name.starts_with("avx512.mask.broadcast.s")) {
2868 // Replace vp?broadcasts with a vector shuffle.
2869 Value *Op = CI->getArgOperand(0);
2870 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2871 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2872 SmallVector<int, 8> M;
2873 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2874 Rep = Builder.CreateShuffleVector(Op, M);
2875
2876 if (CI->arg_size() == 3)
2877 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2878 CI->getArgOperand(1));
2879 } else if (Name.starts_with("sse2.padds.") ||
2880 Name.starts_with("avx2.padds.") ||
2881 Name.starts_with("avx512.padds.") ||
2882 Name.starts_with("avx512.mask.padds.")) {
2883 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2884 } else if (Name.starts_with("sse2.psubs.") ||
2885 Name.starts_with("avx2.psubs.") ||
2886 Name.starts_with("avx512.psubs.") ||
2887 Name.starts_with("avx512.mask.psubs.")) {
2888 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2889 } else if (Name.starts_with("sse2.paddus.") ||
2890 Name.starts_with("avx2.paddus.") ||
2891 Name.starts_with("avx512.mask.paddus.")) {
2892 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2893 } else if (Name.starts_with("sse2.psubus.") ||
2894 Name.starts_with("avx2.psubus.") ||
2895 Name.starts_with("avx512.mask.psubus.")) {
2896 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2897 } else if (Name.starts_with("avx512.mask.palignr.")) {
2898 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2899 CI->getArgOperand(1), CI->getArgOperand(2),
2900 CI->getArgOperand(3), CI->getArgOperand(4),
2901 false);
2902 } else if (Name.starts_with("avx512.mask.valign.")) {
2903 Rep = upgradeX86ALIGNIntrinsics(
2904 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2905 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
2906 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
2907 // 128/256-bit shift left specified in bits.
2908 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2909 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2910 Shift / 8); // Shift is in bits.
2911 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
2912 // 128/256-bit shift right specified in bits.
2913 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2915 Shift / 8); // Shift is in bits.
2916 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
2917 Name == "avx512.psll.dq.512") {
2918 // 128/256/512-bit shift left specified in bytes.
2919 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2921 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
2922 Name == "avx512.psrl.dq.512") {
2923 // 128/256/512-bit shift right specified in bytes.
2924 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2925 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2926 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
2927 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
2928 Name.starts_with("avx2.pblendd.")) {
2929 Value *Op0 = CI->getArgOperand(0);
2930 Value *Op1 = CI->getArgOperand(1);
2931 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2932 auto *VecTy = cast<FixedVectorType>(CI->getType());
2933 unsigned NumElts = VecTy->getNumElements();
2934
2935 SmallVector<int, 16> Idxs(NumElts);
2936 for (unsigned i = 0; i != NumElts; ++i)
2937 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
2938
2939 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940 } else if (Name.starts_with("avx.vinsertf128.") ||
2941 Name == "avx2.vinserti128" ||
2942 Name.starts_with("avx512.mask.insert")) {
2943 Value *Op0 = CI->getArgOperand(0);
2944 Value *Op1 = CI->getArgOperand(1);
2945 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2946 unsigned DstNumElts =
2947 cast<FixedVectorType>(CI->getType())->getNumElements();
2948 unsigned SrcNumElts =
2949 cast<FixedVectorType>(Op1->getType())->getNumElements();
2950 unsigned Scale = DstNumElts / SrcNumElts;
2951
2952 // Mask off the high bits of the immediate value; hardware ignores those.
2953 Imm = Imm % Scale;
2954
2955 // Extend the second operand into a vector the size of the destination.
2956 SmallVector<int, 8> Idxs(DstNumElts);
2957 for (unsigned i = 0; i != SrcNumElts; ++i)
2958 Idxs[i] = i;
2959 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2960 Idxs[i] = SrcNumElts;
2961 Rep = Builder.CreateShuffleVector(Op1, Idxs);
2962
2963 // Insert the second operand into the first operand.
2964
2965 // Note that there is no guarantee that instruction lowering will actually
2966 // produce a vinsertf128 instruction for the created shuffles. In
2967 // particular, the 0 immediate case involves no lane changes, so it can
2968 // be handled as a blend.
2969
2970 // Example of shuffle mask for 32-bit elements:
2971 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2972 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2973
2974 // First fill with identify mask.
2975 for (unsigned i = 0; i != DstNumElts; ++i)
2976 Idxs[i] = i;
2977 // Then replace the elements where we need to insert.
2978 for (unsigned i = 0; i != SrcNumElts; ++i)
2979 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2980 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2981
2982 // If the intrinsic has a mask operand, handle that.
2983 if (CI->arg_size() == 5)
2984 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
2985 CI->getArgOperand(3));
2986 } else if (Name.starts_with("avx.vextractf128.") ||
2987 Name == "avx2.vextracti128" ||
2988 Name.starts_with("avx512.mask.vextract")) {
2989 Value *Op0 = CI->getArgOperand(0);
2990 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2991 unsigned DstNumElts =
2992 cast<FixedVectorType>(CI->getType())->getNumElements();
2993 unsigned SrcNumElts =
2994 cast<FixedVectorType>(Op0->getType())->getNumElements();
2995 unsigned Scale = SrcNumElts / DstNumElts;
2996
2997 // Mask off the high bits of the immediate value; hardware ignores those.
2998 Imm = Imm % Scale;
2999
3000 // Get indexes for the subvector of the input vector.
3001 SmallVector<int, 8> Idxs(DstNumElts);
3002 for (unsigned i = 0; i != DstNumElts; ++i) {
3003 Idxs[i] = i + (Imm * DstNumElts);
3004 }
3005 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3006
3007 // If the intrinsic has a mask operand, handle that.
3008 if (CI->arg_size() == 4)
3009 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3010 CI->getArgOperand(2));
3011 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3012 Name.starts_with("avx512.mask.perm.di.")) {
3013 Value *Op0 = CI->getArgOperand(0);
3014 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3015 auto *VecTy = cast<FixedVectorType>(CI->getType());
3016 unsigned NumElts = VecTy->getNumElements();
3017
3018 SmallVector<int, 8> Idxs(NumElts);
3019 for (unsigned i = 0; i != NumElts; ++i)
3020 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3021
3022 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3023
3024 if (CI->arg_size() == 4)
3025 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3026 CI->getArgOperand(2));
3027 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3028 // The immediate permute control byte looks like this:
3029 // [1:0] - select 128 bits from sources for low half of destination
3030 // [2] - ignore
3031 // [3] - zero low half of destination
3032 // [5:4] - select 128 bits from sources for high half of destination
3033 // [6] - ignore
3034 // [7] - zero high half of destination
3035
3036 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3037
3038 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3039 unsigned HalfSize = NumElts / 2;
3040 SmallVector<int, 8> ShuffleMask(NumElts);
3041
3042 // Determine which operand(s) are actually in use for this instruction.
3043 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3044 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3045
3046 // If needed, replace operands based on zero mask.
3047 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3048 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3049
3050 // Permute low half of result.
3051 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3052 for (unsigned i = 0; i < HalfSize; ++i)
3053 ShuffleMask[i] = StartIndex + i;
3054
3055 // Permute high half of result.
3056 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3057 for (unsigned i = 0; i < HalfSize; ++i)
3058 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3059
3060 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3061
3062 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3063 Name.starts_with("avx512.mask.vpermil.p") ||
3064 Name.starts_with("avx512.mask.pshuf.d.")) {
3065 Value *Op0 = CI->getArgOperand(0);
3066 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067 auto *VecTy = cast<FixedVectorType>(CI->getType());
3068 unsigned NumElts = VecTy->getNumElements();
3069 // Calculate the size of each index in the immediate.
3070 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3071 unsigned IdxMask = ((1 << IdxSize) - 1);
3072
3073 SmallVector<int, 8> Idxs(NumElts);
3074 // Lookup the bits for this element, wrapping around the immediate every
3075 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3076 // to offset by the first index of each group.
3077 for (unsigned i = 0; i != NumElts; ++i)
3078 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3079
3080 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3081
3082 if (CI->arg_size() == 4)
3083 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3084 CI->getArgOperand(2));
3085 } else if (Name == "sse2.pshufl.w" ||
3086 Name.starts_with("avx512.mask.pshufl.w.")) {
3087 Value *Op0 = CI->getArgOperand(0);
3088 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3090
3091 SmallVector<int, 16> Idxs(NumElts);
3092 for (unsigned l = 0; l != NumElts; l += 8) {
3093 for (unsigned i = 0; i != 4; ++i)
3094 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3095 for (unsigned i = 4; i != 8; ++i)
3096 Idxs[i + l] = i + l;
3097 }
3098
3099 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3100
3101 if (CI->arg_size() == 4)
3102 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3103 CI->getArgOperand(2));
3104 } else if (Name == "sse2.pshufh.w" ||
3105 Name.starts_with("avx512.mask.pshufh.w.")) {
3106 Value *Op0 = CI->getArgOperand(0);
3107 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3108 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3109
3110 SmallVector<int, 16> Idxs(NumElts);
3111 for (unsigned l = 0; l != NumElts; l += 8) {
3112 for (unsigned i = 0; i != 4; ++i)
3113 Idxs[i + l] = i + l;
3114 for (unsigned i = 0; i != 4; ++i)
3115 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3116 }
3117
3118 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3119
3120 if (CI->arg_size() == 4)
3121 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3122 CI->getArgOperand(2));
3123 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3124 Value *Op0 = CI->getArgOperand(0);
3125 Value *Op1 = CI->getArgOperand(1);
3126 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3128
3129 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3130 unsigned HalfLaneElts = NumLaneElts / 2;
3131
3132 SmallVector<int, 16> Idxs(NumElts);
3133 for (unsigned i = 0; i != NumElts; ++i) {
3134 // Base index is the starting element of the lane.
3135 Idxs[i] = i - (i % NumLaneElts);
3136 // If we are half way through the lane switch to the other source.
3137 if ((i % NumLaneElts) >= HalfLaneElts)
3138 Idxs[i] += NumElts;
3139 // Now select the specific element. By adding HalfLaneElts bits from
3140 // the immediate. Wrapping around the immediate every 8-bits.
3141 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3142 }
3143
3144 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3145
3146 Rep =
3147 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3148 } else if (Name.starts_with("avx512.mask.movddup") ||
3149 Name.starts_with("avx512.mask.movshdup") ||
3150 Name.starts_with("avx512.mask.movsldup")) {
3151 Value *Op0 = CI->getArgOperand(0);
3152 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3153 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3154
3155 unsigned Offset = 0;
3156 if (Name.starts_with("avx512.mask.movshdup."))
3157 Offset = 1;
3158
3159 SmallVector<int, 16> Idxs(NumElts);
3160 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3161 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3162 Idxs[i + l + 0] = i + l + Offset;
3163 Idxs[i + l + 1] = i + l + Offset;
3164 }
3165
3166 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3167
3168 Rep =
3169 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3170 } else if (Name.starts_with("avx512.mask.punpckl") ||
3171 Name.starts_with("avx512.mask.unpckl.")) {
3172 Value *Op0 = CI->getArgOperand(0);
3173 Value *Op1 = CI->getArgOperand(1);
3174 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3175 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3176
3177 SmallVector<int, 64> Idxs(NumElts);
3178 for (int l = 0; l != NumElts; l += NumLaneElts)
3179 for (int i = 0; i != NumLaneElts; ++i)
3180 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3181
3182 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3183
3184 Rep =
3185 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3186 } else if (Name.starts_with("avx512.mask.punpckh") ||
3187 Name.starts_with("avx512.mask.unpckh.")) {
3188 Value *Op0 = CI->getArgOperand(0);
3189 Value *Op1 = CI->getArgOperand(1);
3190 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3191 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3192
3193 SmallVector<int, 64> Idxs(NumElts);
3194 for (int l = 0; l != NumElts; l += NumLaneElts)
3195 for (int i = 0; i != NumLaneElts; ++i)
3196 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3197
3198 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3199
3200 Rep =
3201 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3202 } else if (Name.starts_with("avx512.mask.and.") ||
3203 Name.starts_with("avx512.mask.pand.")) {
3204 VectorType *FTy = cast<VectorType>(CI->getType());
3205 VectorType *ITy = VectorType::getInteger(FTy);
3206 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3207 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3208 Rep = Builder.CreateBitCast(Rep, FTy);
3209 Rep =
3210 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3211 } else if (Name.starts_with("avx512.mask.andn.") ||
3212 Name.starts_with("avx512.mask.pandn.")) {
3213 VectorType *FTy = cast<VectorType>(CI->getType());
3214 VectorType *ITy = VectorType::getInteger(FTy);
3215 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3216 Rep = Builder.CreateAnd(Rep,
3217 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3218 Rep = Builder.CreateBitCast(Rep, FTy);
3219 Rep =
3220 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3221 } else if (Name.starts_with("avx512.mask.or.") ||
3222 Name.starts_with("avx512.mask.por.")) {
3223 VectorType *FTy = cast<VectorType>(CI->getType());
3224 VectorType *ITy = VectorType::getInteger(FTy);
3225 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3226 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3227 Rep = Builder.CreateBitCast(Rep, FTy);
3228 Rep =
3229 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3230 } else if (Name.starts_with("avx512.mask.xor.") ||
3231 Name.starts_with("avx512.mask.pxor.")) {
3232 VectorType *FTy = cast<VectorType>(CI->getType());
3233 VectorType *ITy = VectorType::getInteger(FTy);
3234 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3235 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3236 Rep = Builder.CreateBitCast(Rep, FTy);
3237 Rep =
3238 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3239 } else if (Name.starts_with("avx512.mask.padd.")) {
3240 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3241 Rep =
3242 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3243 } else if (Name.starts_with("avx512.mask.psub.")) {
3244 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3245 Rep =
3246 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3247 } else if (Name.starts_with("avx512.mask.pmull.")) {
3248 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3249 Rep =
3250 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3251 } else if (Name.starts_with("avx512.mask.add.p")) {
3252 if (Name.ends_with(".512")) {
3253 Intrinsic::ID IID;
3254 if (Name[17] == 's')
3255 IID = Intrinsic::x86_avx512_add_ps_512;
3256 else
3257 IID = Intrinsic::x86_avx512_add_pd_512;
3258
3259 Rep = Builder.CreateCall(
3260 Intrinsic::getDeclaration(F->getParent(), IID),
3261 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3262 } else {
3263 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3264 }
3265 Rep =
3266 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3267 } else if (Name.starts_with("avx512.mask.div.p")) {
3268 if (Name.ends_with(".512")) {
3269 Intrinsic::ID IID;
3270 if (Name[17] == 's')
3271 IID = Intrinsic::x86_avx512_div_ps_512;
3272 else
3273 IID = Intrinsic::x86_avx512_div_pd_512;
3274
3275 Rep = Builder.CreateCall(
3276 Intrinsic::getDeclaration(F->getParent(), IID),
3277 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3278 } else {
3279 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3280 }
3281 Rep =
3282 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3283 } else if (Name.starts_with("avx512.mask.mul.p")) {
3284 if (Name.ends_with(".512")) {
3285 Intrinsic::ID IID;
3286 if (Name[17] == 's')
3287 IID = Intrinsic::x86_avx512_mul_ps_512;
3288 else
3289 IID = Intrinsic::x86_avx512_mul_pd_512;
3290
3291 Rep = Builder.CreateCall(
3292 Intrinsic::getDeclaration(F->getParent(), IID),
3293 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3294 } else {
3295 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3296 }
3297 Rep =
3298 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3299 } else if (Name.starts_with("avx512.mask.sub.p")) {
3300 if (Name.ends_with(".512")) {
3301 Intrinsic::ID IID;
3302 if (Name[17] == 's')
3303 IID = Intrinsic::x86_avx512_sub_ps_512;
3304 else
3305 IID = Intrinsic::x86_avx512_sub_pd_512;
3306
3307 Rep = Builder.CreateCall(
3308 Intrinsic::getDeclaration(F->getParent(), IID),
3309 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3310 } else {
3311 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3312 }
3313 Rep =
3314 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3315 } else if ((Name.starts_with("avx512.mask.max.p") ||
3316 Name.starts_with("avx512.mask.min.p")) &&
3317 Name.drop_front(18) == ".512") {
3318 bool IsDouble = Name[17] == 'd';
3319 bool IsMin = Name[13] == 'i';
3320 static const Intrinsic::ID MinMaxTbl[2][2] = {
3321 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3322 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3323 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3324
3325 Rep = Builder.CreateCall(
3326 Intrinsic::getDeclaration(F->getParent(), IID),
3327 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3328 Rep =
3329 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3331 Rep =
3332 Builder.CreateCall(Intrinsic::getDeclaration(
3333 F->getParent(), Intrinsic::ctlz, CI->getType()),
3334 {CI->getArgOperand(0), Builder.getInt1(false)});
3335 Rep =
3336 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3337 } else if (Name.starts_with("avx512.mask.psll")) {
3338 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3339 bool IsVariable = Name[16] == 'v';
3340 char Size = Name[16] == '.' ? Name[17]
3341 : Name[17] == '.' ? Name[18]
3342 : Name[18] == '.' ? Name[19]
3343 : Name[20];
3344
3345 Intrinsic::ID IID;
3346 if (IsVariable && Name[17] != '.') {
3347 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3348 IID = Intrinsic::x86_avx2_psllv_q;
3349 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3350 IID = Intrinsic::x86_avx2_psllv_q_256;
3351 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3352 IID = Intrinsic::x86_avx2_psllv_d;
3353 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3354 IID = Intrinsic::x86_avx2_psllv_d_256;
3355 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3356 IID = Intrinsic::x86_avx512_psllv_w_128;
3357 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3358 IID = Intrinsic::x86_avx512_psllv_w_256;
3359 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3360 IID = Intrinsic::x86_avx512_psllv_w_512;
3361 else
3362 llvm_unreachable("Unexpected size");
3363 } else if (Name.ends_with(".128")) {
3364 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3365 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3366 : Intrinsic::x86_sse2_psll_d;
3367 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3368 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3369 : Intrinsic::x86_sse2_psll_q;
3370 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3371 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3372 : Intrinsic::x86_sse2_psll_w;
3373 else
3374 llvm_unreachable("Unexpected size");
3375 } else if (Name.ends_with(".256")) {
3376 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3377 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3378 : Intrinsic::x86_avx2_psll_d;
3379 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3380 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3381 : Intrinsic::x86_avx2_psll_q;
3382 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3383 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3384 : Intrinsic::x86_avx2_psll_w;
3385 else
3386 llvm_unreachable("Unexpected size");
3387 } else {
3388 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3389 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3390 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3391 : Intrinsic::x86_avx512_psll_d_512;
3392 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3393 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3394 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3395 : Intrinsic::x86_avx512_psll_q_512;
3396 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3397 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3398 : Intrinsic::x86_avx512_psll_w_512;
3399 else
3400 llvm_unreachable("Unexpected size");
3401 }
3402
3403 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3404 } else if (Name.starts_with("avx512.mask.psrl")) {
3405 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3406 bool IsVariable = Name[16] == 'v';
3407 char Size = Name[16] == '.' ? Name[17]
3408 : Name[17] == '.' ? Name[18]
3409 : Name[18] == '.' ? Name[19]
3410 : Name[20];
3411
3412 Intrinsic::ID IID;
3413 if (IsVariable && Name[17] != '.') {
3414 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3415 IID = Intrinsic::x86_avx2_psrlv_q;
3416 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3417 IID = Intrinsic::x86_avx2_psrlv_q_256;
3418 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3419 IID = Intrinsic::x86_avx2_psrlv_d;
3420 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3421 IID = Intrinsic::x86_avx2_psrlv_d_256;
3422 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3423 IID = Intrinsic::x86_avx512_psrlv_w_128;
3424 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3425 IID = Intrinsic::x86_avx512_psrlv_w_256;
3426 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3427 IID = Intrinsic::x86_avx512_psrlv_w_512;
3428 else
3429 llvm_unreachable("Unexpected size");
3430 } else if (Name.ends_with(".128")) {
3431 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3432 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3433 : Intrinsic::x86_sse2_psrl_d;
3434 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3435 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3436 : Intrinsic::x86_sse2_psrl_q;
3437 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3438 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3439 : Intrinsic::x86_sse2_psrl_w;
3440 else
3441 llvm_unreachable("Unexpected size");
3442 } else if (Name.ends_with(".256")) {
3443 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3444 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3445 : Intrinsic::x86_avx2_psrl_d;
3446 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3447 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3448 : Intrinsic::x86_avx2_psrl_q;
3449 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3450 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3451 : Intrinsic::x86_avx2_psrl_w;
3452 else
3453 llvm_unreachable("Unexpected size");
3454 } else {
3455 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3456 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3457 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3458 : Intrinsic::x86_avx512_psrl_d_512;
3459 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3460 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3461 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3462 : Intrinsic::x86_avx512_psrl_q_512;
3463 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3464 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3465 : Intrinsic::x86_avx512_psrl_w_512;
3466 else
3467 llvm_unreachable("Unexpected size");
3468 }
3469
3470 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3471 } else if (Name.starts_with("avx512.mask.psra")) {
3472 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3473 bool IsVariable = Name[16] == 'v';
3474 char Size = Name[16] == '.' ? Name[17]
3475 : Name[17] == '.' ? Name[18]
3476 : Name[18] == '.' ? Name[19]
3477 : Name[20];
3478
3479 Intrinsic::ID IID;
3480 if (IsVariable && Name[17] != '.') {
3481 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3482 IID = Intrinsic::x86_avx2_psrav_d;
3483 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3484 IID = Intrinsic::x86_avx2_psrav_d_256;
3485 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3486 IID = Intrinsic::x86_avx512_psrav_w_128;
3487 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3488 IID = Intrinsic::x86_avx512_psrav_w_256;
3489 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3490 IID = Intrinsic::x86_avx512_psrav_w_512;
3491 else
3492 llvm_unreachable("Unexpected size");
3493 } else if (Name.ends_with(".128")) {
3494 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3495 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3496 : Intrinsic::x86_sse2_psra_d;
3497 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3498 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3499 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3500 : Intrinsic::x86_avx512_psra_q_128;
3501 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3502 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3503 : Intrinsic::x86_sse2_psra_w;
3504 else
3505 llvm_unreachable("Unexpected size");
3506 } else if (Name.ends_with(".256")) {
3507 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3508 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3509 : Intrinsic::x86_avx2_psra_d;
3510 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3511 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3512 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3513 : Intrinsic::x86_avx512_psra_q_256;
3514 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3515 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3516 : Intrinsic::x86_avx2_psra_w;
3517 else
3518 llvm_unreachable("Unexpected size");
3519 } else {
3520 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3521 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3522 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3523 : Intrinsic::x86_avx512_psra_d_512;
3524 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3525 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3526 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3527 : Intrinsic::x86_avx512_psra_q_512;
3528 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3529 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3530 : Intrinsic::x86_avx512_psra_w_512;
3531 else
3532 llvm_unreachable("Unexpected size");
3533 }
3534
3535 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3536 } else if (Name.starts_with("avx512.mask.move.s")) {
3537 Rep = upgradeMaskedMove(Builder, *CI);
3538 } else if (Name.starts_with("avx512.cvtmask2")) {
3539 Rep = upgradeMaskToInt(Builder, *CI);
3540 } else if (Name.ends_with(".movntdqa")) {
3541 MDNode *Node = MDNode::get(
3542 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3543
3544 Value *Ptr = CI->getArgOperand(0);
3545
3546 // Convert the type of the pointer to a pointer to the stored type.
3547 Value *BC = Builder.CreateBitCast(
3548 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3549 LoadInst *LI = Builder.CreateAlignedLoad(
3550 CI->getType(), BC,
3551 Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3552 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3553 Rep = LI;
3554 } else if (Name.starts_with("fma.vfmadd.") ||
3555 Name.starts_with("fma.vfmsub.") ||
3556 Name.starts_with("fma.vfnmadd.") ||
3557 Name.starts_with("fma.vfnmsub.")) {
3558 bool NegMul = Name[6] == 'n';
3559 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3560 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3561
3562 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3563 CI->getArgOperand(2)};
3564
3565 if (IsScalar) {
3566 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3567 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3568 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3569 }
3570
3571 if (NegMul && !IsScalar)
3572 Ops[0] = Builder.CreateFNeg(Ops[0]);
3573 if (NegMul && IsScalar)
3574 Ops[1] = Builder.CreateFNeg(Ops[1]);
3575 if (NegAcc)
3576 Ops[2] = Builder.CreateFNeg(Ops[2]);
3577
3578 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3579 Intrinsic::fma,
3580 Ops[0]->getType()),
3581 Ops);
3582
3583 if (IsScalar)
3584 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3585 } else if (Name.starts_with("fma4.vfmadd.s")) {
3586 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3587 CI->getArgOperand(2)};
3588
3589 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3590 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3591 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3592
3593 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3594 Intrinsic::fma,
3595 Ops[0]->getType()),
3596 Ops);
3597
3598 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3599 Rep, (uint64_t)0);
3600 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3601 Name.starts_with("avx512.maskz.vfmadd.s") ||
3602 Name.starts_with("avx512.mask3.vfmadd.s") ||
3603 Name.starts_with("avx512.mask3.vfmsub.s") ||
3604 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3605 bool IsMask3 = Name[11] == '3';
3606 bool IsMaskZ = Name[11] == 'z';
3607 // Drop the "avx512.mask." to make it easier.
3608 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3609 bool NegMul = Name[2] == 'n';
3610 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3611
3612 Value *A = CI->getArgOperand(0);
3613 Value *B = CI->getArgOperand(1);
3614 Value *C = CI->getArgOperand(2);
3615
3616 if (NegMul && (IsMask3 || IsMaskZ))
3617 A = Builder.CreateFNeg(A);
3618 if (NegMul && !(IsMask3 || IsMaskZ))
3619 B = Builder.CreateFNeg(B);
3620 if (NegAcc)
3621 C = Builder.CreateFNeg(C);
3622
3623 A = Builder.CreateExtractElement(A, (uint64_t)0);
3624 B = Builder.CreateExtractElement(B, (uint64_t)0);
3625 C = Builder.CreateExtractElement(C, (uint64_t)0);
3626
3627 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3628 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3629 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3630
3631 Intrinsic::ID IID;
3632 if (Name.back() == 'd')
3633 IID = Intrinsic::x86_avx512_vfmadd_f64;
3634 else
3635 IID = Intrinsic::x86_avx512_vfmadd_f32;
3636 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3637 Rep = Builder.CreateCall(FMA, Ops);
3638 } else {
3639 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3640 A->getType());
3641 Rep = Builder.CreateCall(FMA, {A, B, C});
3642 }
3643
3644 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3645 : IsMask3 ? C
3646 : A;
3647
3648 // For Mask3 with NegAcc, we need to create a new extractelement that
3649 // avoids the negation above.
3650 if (NegAcc && IsMask3)
3651 PassThru =
3652 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3653
3654 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3655 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3656 (uint64_t)0);
3657 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3658 Name.starts_with("avx512.mask.vfnmadd.p") ||
3659 Name.starts_with("avx512.mask.vfnmsub.p") ||
3660 Name.starts_with("avx512.mask3.vfmadd.p") ||
3661 Name.starts_with("avx512.mask3.vfmsub.p") ||
3662 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3663 Name.starts_with("avx512.maskz.vfmadd.p")) {
3664 bool IsMask3 = Name[11] == '3';
3665 bool IsMaskZ = Name[11] == 'z';
3666 // Drop the "avx512.mask." to make it easier.
3667 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3668 bool NegMul = Name[2] == 'n';
3669 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3670
3671 Value *A = CI->getArgOperand(0);
3672 Value *B = CI->getArgOperand(1);
3673 Value *C = CI->getArgOperand(2);
3674
3675 if (NegMul && (IsMask3 || IsMaskZ))
3676 A = Builder.CreateFNeg(A);
3677 if (NegMul && !(IsMask3 || IsMaskZ))
3678 B = Builder.CreateFNeg(B);
3679 if (NegAcc)
3680 C = Builder.CreateFNeg(C);
3681
3682 if (CI->arg_size() == 5 &&
3683 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3684 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3685 Intrinsic::ID IID;
3686 // Check the character before ".512" in string.
3687 if (Name[Name.size() - 5] == 's')
3688 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3689 else
3690 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3691
3692 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3693 {A, B, C, CI->getArgOperand(4)});
3694 } else {
3695 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3696 A->getType());
3697 Rep = Builder.CreateCall(FMA, {A, B, C});
3698 }
3699
3700 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3701 : IsMask3 ? CI->getArgOperand(2)
3702 : CI->getArgOperand(0);
3703
3704 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3705 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3706 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3707 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3708 Intrinsic::ID IID;
3709 if (VecWidth == 128 && EltWidth == 32)
3710 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3711 else if (VecWidth == 256 && EltWidth == 32)
3712 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3713 else if (VecWidth == 128 && EltWidth == 64)
3714 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3715 else if (VecWidth == 256 && EltWidth == 64)
3716 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3717 else
3718 llvm_unreachable("Unexpected intrinsic");
3719
3720 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3721 CI->getArgOperand(2)};
3722 Ops[2] = Builder.CreateFNeg(Ops[2]);
3723 Rep =
3724 Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);
3725 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3726 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3727 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3728 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3729 bool IsMask3 = Name[11] == '3';
3730 bool IsMaskZ = Name[11] == 'z';
3731 // Drop the "avx512.mask." to make it easier.
3732 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3733 bool IsSubAdd = Name[3] == 's';
3734 if (CI->arg_size() == 5) {
3735 Intrinsic::ID IID;
3736 // Check the character before ".512" in string.
3737 if (Name[Name.size() - 5] == 's')
3738 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3739 else
3740 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3741
3742 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3743 CI->getArgOperand(2), CI->getArgOperand(4)};
3744 if (IsSubAdd)
3745 Ops[2] = Builder.CreateFNeg(Ops[2]);
3746
3747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3748 Ops);
3749 } else {
3750 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751
3752 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3753 CI->getArgOperand(2)};
3754
3755 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3756 Ops[0]->getType());
3757 Value *Odd = Builder.CreateCall(FMA, Ops);
3758 Ops[2] = Builder.CreateFNeg(Ops[2]);
3759 Value *Even = Builder.CreateCall(FMA, Ops);
3760
3761 if (IsSubAdd)
3762 std::swap(Even, Odd);
3763
3764 SmallVector<int, 32> Idxs(NumElts);
3765 for (int i = 0; i != NumElts; ++i)
3766 Idxs[i] = i + (i % 2) * NumElts;
3767
3768 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3769 }
3770
3771 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3772 : IsMask3 ? CI->getArgOperand(2)
3773 : CI->getArgOperand(0);
3774
3775 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3776 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3777 Name.starts_with("avx512.maskz.pternlog.")) {
3778 bool ZeroMask = Name[11] == 'z';
3779 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3780 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3781 Intrinsic::ID IID;
3782 if (VecWidth == 128 && EltWidth == 32)
3783 IID = Intrinsic::x86_avx512_pternlog_d_128;
3784 else if (VecWidth == 256 && EltWidth == 32)
3785 IID = Intrinsic::x86_avx512_pternlog_d_256;
3786 else if (VecWidth == 512 && EltWidth == 32)
3787 IID = Intrinsic::x86_avx512_pternlog_d_512;
3788 else if (VecWidth == 128 && EltWidth == 64)
3789 IID = Intrinsic::x86_avx512_pternlog_q_128;
3790 else if (VecWidth == 256 && EltWidth == 64)
3791 IID = Intrinsic::x86_avx512_pternlog_q_256;
3792 else if (VecWidth == 512 && EltWidth == 64)
3793 IID = Intrinsic::x86_avx512_pternlog_q_512;
3794 else
3795 llvm_unreachable("Unexpected intrinsic");
3796
3797 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3798 CI->getArgOperand(2), CI->getArgOperand(3)};
3799 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3800 Args);
3801 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3802 : CI->getArgOperand(0);
3803 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3804 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3805 Name.starts_with("avx512.maskz.vpmadd52")) {
3806 bool ZeroMask = Name[11] == 'z';
3807 bool High = Name[20] == 'h' || Name[21] == 'h';
3808 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3809 Intrinsic::ID IID;
3810 if (VecWidth == 128 && !High)
3811 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3812 else if (VecWidth == 256 && !High)
3813 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3814 else if (VecWidth == 512 && !High)
3815 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3816 else if (VecWidth == 128 && High)
3817 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3818 else if (VecWidth == 256 && High)
3819 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3820 else if (VecWidth == 512 && High)
3821 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3822 else
3823 llvm_unreachable("Unexpected intrinsic");
3824
3825 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3826 CI->getArgOperand(2)};
3827 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3828 Args);
3829 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3830 : CI->getArgOperand(0);
3831 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3832 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3833 Name.starts_with("avx512.mask.vpermt2var.") ||
3834 Name.starts_with("avx512.maskz.vpermt2var.")) {
3835 bool ZeroMask = Name[11] == 'z';
3836 bool IndexForm = Name[17] == 'i';
3837 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3838 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3839 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3840 Name.starts_with("avx512.mask.vpdpbusds.") ||
3841 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3842 bool ZeroMask = Name[11] == 'z';
3843 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3844 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3845 Intrinsic::ID IID;
3846 if (VecWidth == 128 && !IsSaturating)
3847 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3848 else if (VecWidth == 256 && !IsSaturating)
3849 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3850 else if (VecWidth == 512 && !IsSaturating)
3851 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3852 else if (VecWidth == 128 && IsSaturating)
3853 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3854 else if (VecWidth == 256 && IsSaturating)
3855 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3856 else if (VecWidth == 512 && IsSaturating)
3857 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3858 else
3859 llvm_unreachable("Unexpected intrinsic");
3860
3861 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3862 CI->getArgOperand(2)};
3863 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3864 Args);
3865 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3866 : CI->getArgOperand(0);
3867 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3868 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3869 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3870 Name.starts_with("avx512.mask.vpdpwssds.") ||
3871 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3872 bool ZeroMask = Name[11] == 'z';
3873 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3874 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3875 Intrinsic::ID IID;
3876 if (VecWidth == 128 && !IsSaturating)
3877 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3878 else if (VecWidth == 256 && !IsSaturating)
3879 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3880 else if (VecWidth == 512 && !IsSaturating)
3881 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3882 else if (VecWidth == 128 && IsSaturating)
3883 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3884 else if (VecWidth == 256 && IsSaturating)
3885 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3886 else if (VecWidth == 512 && IsSaturating)
3887 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3888 else
3889 llvm_unreachable("Unexpected intrinsic");
3890
3891 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3892 CI->getArgOperand(2)};
3893 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3894 Args);
3895 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3896 : CI->getArgOperand(0);
3897 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3898 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3899 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3900 Name == "subborrow.u32" || Name == "subborrow.u64") {
3901 Intrinsic::ID IID;
3902 if (Name[0] == 'a' && Name.back() == '2')
3903 IID = Intrinsic::x86_addcarry_32;
3904 else if (Name[0] == 'a' && Name.back() == '4')
3905 IID = Intrinsic::x86_addcarry_64;
3906 else if (Name[0] == 's' && Name.back() == '2')
3907 IID = Intrinsic::x86_subborrow_32;
3908 else if (Name[0] == 's' && Name.back() == '4')
3909 IID = Intrinsic::x86_subborrow_64;
3910 else
3911 llvm_unreachable("Unexpected intrinsic");
3912
3913 // Make a call with 3 operands.
3914 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915 CI->getArgOperand(2)};
3916 Value *NewCall = Builder.CreateCall(
3917 Intrinsic::getDeclaration(CI->getModule(), IID), Args);
3918
3919 // Extract the second result and store it.
3920 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3921 // Cast the pointer to the right type.
3922 Value *Ptr = Builder.CreateBitCast(
3923 CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
3924 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3925 // Replace the original call result with the first result of the new call.
3926 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3927
3928 CI->replaceAllUsesWith(CF);
3929 Rep = nullptr;
3930 } else if (Name.starts_with("avx512.mask.") &&
3931 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3932 // Rep will be updated by the call in the condition.
3933 }
3934
3935 return Rep;
3936 }
3937
upgradeARMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)3938 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
3939 IRBuilder<> &Builder) {
3940 if (Name == "mve.vctp64.old") {
3941 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
3942 // correct type.
3943 Value *VCTP = Builder.CreateCall(
3944 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
3945 CI->getArgOperand(0), CI->getName());
3946 Value *C1 = Builder.CreateCall(
3947 Intrinsic::getDeclaration(
3948 F->getParent(), Intrinsic::arm_mve_pred_v2i,
3949 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
3950 VCTP);
3951 return Builder.CreateCall(
3952 Intrinsic::getDeclaration(
3953 F->getParent(), Intrinsic::arm_mve_pred_i2v,
3954 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
3955 C1);
3956 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
3957 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
3958 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
3959 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
3960 Name ==
3961 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
3962 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
3963 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
3964 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
3965 Name ==
3966 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
3967 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
3968 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
3969 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
3970 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
3971 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
3972 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
3973 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
3974 std::vector<Type *> Tys;
3975 unsigned ID = CI->getIntrinsicID();
3976 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
3977 switch (ID) {
3978 case Intrinsic::arm_mve_mull_int_predicated:
3979 case Intrinsic::arm_mve_vqdmull_predicated:
3980 case Intrinsic::arm_mve_vldr_gather_base_predicated:
3981 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
3982 break;
3983 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
3984 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
3985 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
3986 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
3987 V2I1Ty};
3988 break;
3989 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
3990 Tys = {CI->getType(), CI->getOperand(0)->getType(),
3991 CI->getOperand(1)->getType(), V2I1Ty};
3992 break;
3993 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
3994 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
3995 CI->getOperand(2)->getType(), V2I1Ty};
3996 break;
3997 case Intrinsic::arm_cde_vcx1q_predicated:
3998 case Intrinsic::arm_cde_vcx1qa_predicated:
3999 case Intrinsic::arm_cde_vcx2q_predicated:
4000 case Intrinsic::arm_cde_vcx2qa_predicated:
4001 case Intrinsic::arm_cde_vcx3q_predicated:
4002 case Intrinsic::arm_cde_vcx3qa_predicated:
4003 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4004 break;
4005 default:
4006 llvm_unreachable("Unhandled Intrinsic!");
4007 }
4008
4009 std::vector<Value *> Ops;
4010 for (Value *Op : CI->args()) {
4011 Type *Ty = Op->getType();
4012 if (Ty->getScalarSizeInBits() == 1) {
4013 Value *C1 = Builder.CreateCall(
4014 Intrinsic::getDeclaration(
4015 F->getParent(), Intrinsic::arm_mve_pred_v2i,
4016 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
4017 Op);
4018 Op = Builder.CreateCall(
4019 Intrinsic::getDeclaration(F->getParent(),
4020 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
4021 C1);
4022 }
4023 Ops.push_back(Op);
4024 }
4025
4026 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
4027 return Builder.CreateCall(Fn, Ops, CI->getName());
4028 }
4029 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4030 }
4031
4032 // These are expected to have the arguments:
4033 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4034 //
4035 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4036 //
upgradeAMDGCNIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4037 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4038 Function *F, IRBuilder<> &Builder) {
4039 AtomicRMWInst::BinOp RMWOp =
4040 StringSwitch<AtomicRMWInst::BinOp>(Name)
4041 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4042 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4043 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4044 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4045 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
4046
4047 unsigned NumOperands = CI->getNumOperands();
4048 if (NumOperands < 3) // Malformed bitcode.
4049 return nullptr;
4050
4051 Value *Ptr = CI->getArgOperand(0);
4052 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4053 if (!PtrTy) // Malformed.
4054 return nullptr;
4055
4056 Value *Val = CI->getArgOperand(1);
4057 if (Val->getType() != CI->getType()) // Malformed.
4058 return nullptr;
4059
4060 ConstantInt *OrderArg = nullptr;
4061 bool IsVolatile = false;
4062
4063 // These should have 5 arguments (plus the callee). A separate version of the
4064 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4065 if (NumOperands > 3)
4066 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4067
4068 // Ignore scope argument at 3
4069
4070 if (NumOperands > 5) {
4071 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4072 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4073 }
4074
4075 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4076 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4077 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4078 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4079 Order = AtomicOrdering::SequentiallyConsistent;
4080
4081 LLVMContext &Ctx = F->getContext();
4082
4083 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4084 Type *RetTy = CI->getType();
4085 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4086 if (VT->getElementType()->isIntegerTy(16)) {
4087 VectorType *AsBF16 =
4088 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4089 Val = Builder.CreateBitCast(Val, AsBF16);
4090 }
4091 }
4092
4093 // The scope argument never really worked correctly. Use agent as the most
4094 // conservative option which should still always produce the instruction.
4095 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4096 AtomicRMWInst *RMW =
4097 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4098
4099 if (PtrTy->getAddressSpace() != 3) {
4100 RMW->setMetadata("amdgpu.no.fine.grained.memory",
4101 MDNode::get(F->getContext(), {}));
4102 }
4103
4104 if (IsVolatile)
4105 RMW->setVolatile(true);
4106
4107 return Builder.CreateBitCast(RMW, RetTy);
4108 }
4109
4110 /// Helper to unwrap intrinsic call MetadataAsValue operands.
4111 template <typename MDType>
unwrapMAVOp(CallBase * CI,unsigned Op)4112 static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4113 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4114 return dyn_cast<MDType>(MAV->getMetadata());
4115 return nullptr;
4116 }
4117
4118 /// Convert debug intrinsic calls to non-instruction debug records.
4119 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4120 /// \p CI - The debug intrinsic call.
upgradeDbgIntrinsicToDbgRecord(StringRef Name,CallBase * CI)4121 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4122 DbgRecord *DR = nullptr;
4123 if (Name == "label") {
4124 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4125 } else if (Name == "assign") {
4126 DR = new DbgVariableRecord(
4127 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4128 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4129 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4130 CI->getDebugLoc());
4131 } else if (Name == "declare") {
4132 DR = new DbgVariableRecord(
4133 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4134 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4135 DbgVariableRecord::LocationType::Declare);
4136 } else if (Name == "addr") {
4137 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4138 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4139 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4140 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4141 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4142 CI->getDebugLoc());
4143 } else if (Name == "value") {
4144 // An old version of dbg.value had an extra offset argument.
4145 unsigned VarOp = 1;
4146 unsigned ExprOp = 2;
4147 if (CI->arg_size() == 4) {
4148 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4149 // Nonzero offset dbg.values get dropped without a replacement.
4150 if (!Offset || !Offset->isZeroValue())
4151 return;
4152 VarOp = 2;
4153 ExprOp = 3;
4154 }
4155 DR = new DbgVariableRecord(
4156 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4157 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4158 }
4159 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4160 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4161 }
4162
4163 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4164 /// provided to seamlessly integrate with existing context.
UpgradeIntrinsicCall(CallBase * CI,Function * NewFn)4165 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4166 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4167 // checks the callee's function type matches. It's likely we need to handle
4168 // type changes here.
4169 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4170 if (!F)
4171 return;
4172
4173 LLVMContext &C = CI->getContext();
4174 IRBuilder<> Builder(C);
4175 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4176
4177 if (!NewFn) {
4178 bool FallthroughToDefaultUpgrade = false;
4179 // Get the Function's name.
4180 StringRef Name = F->getName();
4181
4182 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4183 Name = Name.substr(5);
4184
4185 bool IsX86 = Name.consume_front("x86.");
4186 bool IsNVVM = Name.consume_front("nvvm.");
4187 bool IsARM = Name.consume_front("arm.");
4188 bool IsAMDGCN = Name.consume_front("amdgcn.");
4189 bool IsDbg = Name.consume_front("dbg.");
4190 Value *Rep = nullptr;
4191
4192 if (!IsX86 && Name == "stackprotectorcheck") {
4193 Rep = nullptr;
4194 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4195 Value *Arg = CI->getArgOperand(0);
4196 Value *Neg = Builder.CreateNeg(Arg, "neg");
4197 Value *Cmp = Builder.CreateICmpSGE(
4198 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4199 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4200 } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4201 Name.starts_with("atomic.load.add.f64.p"))) {
4202 Value *Ptr = CI->getArgOperand(0);
4203 Value *Val = CI->getArgOperand(1);
4204 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4205 AtomicOrdering::SequentiallyConsistent);
4206 } else if (IsNVVM && Name.consume_front("max.") &&
4207 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4208 Name == "ui" || Name == "ull")) {
4209 Value *Arg0 = CI->getArgOperand(0);
4210 Value *Arg1 = CI->getArgOperand(1);
4211 Value *Cmp = Name.starts_with("u")
4212 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4213 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4214 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4215 } else if (IsNVVM && Name.consume_front("min.") &&
4216 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4217 Name == "ui" || Name == "ull")) {
4218 Value *Arg0 = CI->getArgOperand(0);
4219 Value *Arg1 = CI->getArgOperand(1);
4220 Value *Cmp = Name.starts_with("u")
4221 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4222 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4223 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4224 } else if (IsNVVM && Name == "clz.ll") {
4225 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4226 Value *Arg = CI->getArgOperand(0);
4227 Value *Ctlz = Builder.CreateCall(
4228 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4229 {Arg->getType()}),
4230 {Arg, Builder.getFalse()}, "ctlz");
4231 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4232 } else if (IsNVVM && Name == "popc.ll") {
4233 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4234 // i64.
4235 Value *Arg = CI->getArgOperand(0);
4236 Value *Popc = Builder.CreateCall(
4237 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4238 {Arg->getType()}),
4239 Arg, "ctpop");
4240 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4241 } else if (IsNVVM) {
4242 if (Name == "h2f") {
4243 Rep =
4244 Builder.CreateCall(Intrinsic::getDeclaration(
4245 F->getParent(), Intrinsic::convert_from_fp16,
4246 {Builder.getFloatTy()}),
4247 CI->getArgOperand(0), "h2f");
4248 } else {
4249 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4250 if (IID != Intrinsic::not_intrinsic &&
4251 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4252 rename(F);
4253 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4254 SmallVector<Value *, 2> Args;
4255 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4256 Value *Arg = CI->getArgOperand(I);
4257 Type *OldType = Arg->getType();
4258 Type *NewType = NewFn->getArg(I)->getType();
4259 Args.push_back((OldType->isIntegerTy() &&
4260 NewType->getScalarType()->isBFloatTy())
4261 ? Builder.CreateBitCast(Arg, NewType)
4262 : Arg);
4263 }
4264 Rep = Builder.CreateCall(NewFn, Args);
4265 if (F->getReturnType()->isIntegerTy())
4266 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4267 }
4268 }
4269 } else if (IsX86) {
4270 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4271 } else if (IsARM) {
4272 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4273 } else if (IsAMDGCN) {
4274 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4275 } else if (IsDbg) {
4276 // We might have decided we don't want the new format after all between
4277 // first requesting the upgrade and now; skip the conversion if that is
4278 // the case, and check here to see if the intrinsic needs to be upgraded
4279 // normally.
4280 if (!CI->getModule()->IsNewDbgInfoFormat) {
4281 bool NeedsUpgrade =
4282 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4283 if (!NeedsUpgrade)
4284 return;
4285 FallthroughToDefaultUpgrade = true;
4286 } else {
4287 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4288 }
4289 } else {
4290 llvm_unreachable("Unknown function for CallBase upgrade.");
4291 }
4292
4293 if (!FallthroughToDefaultUpgrade) {
4294 if (Rep)
4295 CI->replaceAllUsesWith(Rep);
4296 CI->eraseFromParent();
4297 return;
4298 }
4299 }
4300
4301 const auto &DefaultCase = [&]() -> void {
4302 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4303 // Handle generic mangling change.
4304 assert(
4305 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4306 "Unknown function for CallBase upgrade and isn't just a name change");
4307 CI->setCalledFunction(NewFn);
4308 return;
4309 }
4310
4311 // This must be an upgrade from a named to a literal struct.
4312 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4313 assert(OldST != NewFn->getReturnType() &&
4314 "Return type must have changed");
4315 assert(OldST->getNumElements() ==
4316 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4317 "Must have same number of elements");
4318
4319 SmallVector<Value *> Args(CI->args());
4320 Value *NewCI = Builder.CreateCall(NewFn, Args);
4321 Value *Res = PoisonValue::get(OldST);
4322 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4323 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4324 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4325 }
4326 CI->replaceAllUsesWith(Res);
4327 CI->eraseFromParent();
4328 return;
4329 }
4330
4331 // We're probably about to produce something invalid. Let the verifier catch
4332 // it instead of dying here.
4333 CI->setCalledOperand(
4334 ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4335 return;
4336 };
4337 CallInst *NewCall = nullptr;
4338 switch (NewFn->getIntrinsicID()) {
4339 default: {
4340 DefaultCase();
4341 return;
4342 }
4343 case Intrinsic::arm_neon_vst1:
4344 case Intrinsic::arm_neon_vst2:
4345 case Intrinsic::arm_neon_vst3:
4346 case Intrinsic::arm_neon_vst4:
4347 case Intrinsic::arm_neon_vst2lane:
4348 case Intrinsic::arm_neon_vst3lane:
4349 case Intrinsic::arm_neon_vst4lane: {
4350 SmallVector<Value *, 4> Args(CI->args());
4351 NewCall = Builder.CreateCall(NewFn, Args);
4352 break;
4353 }
4354 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4355 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4356 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4357 LLVMContext &Ctx = F->getParent()->getContext();
4358 SmallVector<Value *, 4> Args(CI->args());
4359 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4360 cast<ConstantInt>(Args[3])->getZExtValue());
4361 NewCall = Builder.CreateCall(NewFn, Args);
4362 break;
4363 }
4364 case Intrinsic::aarch64_sve_ld3_sret:
4365 case Intrinsic::aarch64_sve_ld4_sret:
4366 case Intrinsic::aarch64_sve_ld2_sret: {
4367 StringRef Name = F->getName();
4368 Name = Name.substr(5);
4369 unsigned N = StringSwitch<unsigned>(Name)
4370 .StartsWith("aarch64.sve.ld2", 2)
4371 .StartsWith("aarch64.sve.ld3", 3)
4372 .StartsWith("aarch64.sve.ld4", 4)
4373 .Default(0);
4374 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4375 unsigned MinElts = RetTy->getMinNumElements() / N;
4376 SmallVector<Value *, 2> Args(CI->args());
4377 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4378 Value *Ret = llvm::PoisonValue::get(RetTy);
4379 for (unsigned I = 0; I < N; I++) {
4380 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4381 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4382 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4383 }
4384 NewCall = dyn_cast<CallInst>(Ret);
4385 break;
4386 }
4387
4388 case Intrinsic::coro_end: {
4389 SmallVector<Value *, 3> Args(CI->args());
4390 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4391 NewCall = Builder.CreateCall(NewFn, Args);
4392 break;
4393 }
4394
4395 case Intrinsic::vector_extract: {
4396 StringRef Name = F->getName();
4397 Name = Name.substr(5); // Strip llvm
4398 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4399 DefaultCase();
4400 return;
4401 }
4402 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4403 unsigned MinElts = RetTy->getMinNumElements();
4404 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4405 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4406 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4407 break;
4408 }
4409
4410 case Intrinsic::vector_insert: {
4411 StringRef Name = F->getName();
4412 Name = Name.substr(5);
4413 if (!Name.starts_with("aarch64.sve.tuple")) {
4414 DefaultCase();
4415 return;
4416 }
4417 if (Name.starts_with("aarch64.sve.tuple.set")) {
4418 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4419 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4420 Value *NewIdx =
4421 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4422 NewCall = Builder.CreateCall(
4423 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4424 break;
4425 }
4426 if (Name.starts_with("aarch64.sve.tuple.create")) {
4427 unsigned N = StringSwitch<unsigned>(Name)
4428 .StartsWith("aarch64.sve.tuple.create2", 2)
4429 .StartsWith("aarch64.sve.tuple.create3", 3)
4430 .StartsWith("aarch64.sve.tuple.create4", 4)
4431 .Default(0);
4432 assert(N > 1 && "Create is expected to be between 2-4");
4433 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4434 Value *Ret = llvm::PoisonValue::get(RetTy);
4435 unsigned MinElts = RetTy->getMinNumElements() / N;
4436 for (unsigned I = 0; I < N; I++) {
4437 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4438 Value *V = CI->getArgOperand(I);
4439 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4440 }
4441 NewCall = dyn_cast<CallInst>(Ret);
4442 }
4443 break;
4444 }
4445
4446 case Intrinsic::arm_neon_bfdot:
4447 case Intrinsic::arm_neon_bfmmla:
4448 case Intrinsic::arm_neon_bfmlalb:
4449 case Intrinsic::arm_neon_bfmlalt:
4450 case Intrinsic::aarch64_neon_bfdot:
4451 case Intrinsic::aarch64_neon_bfmmla:
4452 case Intrinsic::aarch64_neon_bfmlalb:
4453 case Intrinsic::aarch64_neon_bfmlalt: {
4454 SmallVector<Value *, 3> Args;
4455 assert(CI->arg_size() == 3 &&
4456 "Mismatch between function args and call args");
4457 size_t OperandWidth =
4458 CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4459 assert((OperandWidth == 64 || OperandWidth == 128) &&
4460 "Unexpected operand width");
4461 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4462 auto Iter = CI->args().begin();
4463 Args.push_back(*Iter++);
4464 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4465 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4466 NewCall = Builder.CreateCall(NewFn, Args);
4467 break;
4468 }
4469
4470 case Intrinsic::bitreverse:
4471 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4472 break;
4473
4474 case Intrinsic::ctlz:
4475 case Intrinsic::cttz:
4476 assert(CI->arg_size() == 1 &&
4477 "Mismatch between function args and call args");
4478 NewCall =
4479 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4480 break;
4481
4482 case Intrinsic::objectsize: {
4483 Value *NullIsUnknownSize =
4484 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4485 Value *Dynamic =
4486 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4487 NewCall = Builder.CreateCall(
4488 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4489 break;
4490 }
4491
4492 case Intrinsic::ctpop:
4493 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4494 break;
4495
4496 case Intrinsic::convert_from_fp16:
4497 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4498 break;
4499
4500 case Intrinsic::dbg_value: {
4501 StringRef Name = F->getName();
4502 Name = Name.substr(5); // Strip llvm.
4503 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4504 if (Name.starts_with("dbg.addr")) {
4505 DIExpression *Expr = cast<DIExpression>(
4506 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4507 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4508 NewCall =
4509 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4510 MetadataAsValue::get(C, Expr)});
4511 break;
4512 }
4513
4514 // Upgrade from the old version that had an extra offset argument.
4515 assert(CI->arg_size() == 4);
4516 // Drop nonzero offsets instead of attempting to upgrade them.
4517 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4518 if (Offset->isZeroValue()) {
4519 NewCall = Builder.CreateCall(
4520 NewFn,
4521 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4522 break;
4523 }
4524 CI->eraseFromParent();
4525 return;
4526 }
4527
4528 case Intrinsic::ptr_annotation:
4529 // Upgrade from versions that lacked the annotation attribute argument.
4530 if (CI->arg_size() != 4) {
4531 DefaultCase();
4532 return;
4533 }
4534
4535 // Create a new call with an added null annotation attribute argument.
4536 NewCall =
4537 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4538 CI->getArgOperand(2), CI->getArgOperand(3),
4539 Constant::getNullValue(Builder.getPtrTy())});
4540 NewCall->takeName(CI);
4541 CI->replaceAllUsesWith(NewCall);
4542 CI->eraseFromParent();
4543 return;
4544
4545 case Intrinsic::var_annotation:
4546 // Upgrade from versions that lacked the annotation attribute argument.
4547 if (CI->arg_size() != 4) {
4548 DefaultCase();
4549 return;
4550 }
4551 // Create a new call with an added null annotation attribute argument.
4552 NewCall =
4553 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4554 CI->getArgOperand(2), CI->getArgOperand(3),
4555 Constant::getNullValue(Builder.getPtrTy())});
4556 NewCall->takeName(CI);
4557 CI->replaceAllUsesWith(NewCall);
4558 CI->eraseFromParent();
4559 return;
4560
4561 case Intrinsic::riscv_aes32dsi:
4562 case Intrinsic::riscv_aes32dsmi:
4563 case Intrinsic::riscv_aes32esi:
4564 case Intrinsic::riscv_aes32esmi:
4565 case Intrinsic::riscv_sm4ks:
4566 case Intrinsic::riscv_sm4ed: {
4567 // The last argument to these intrinsics used to be i8 and changed to i32.
4568 // The type overload for sm4ks and sm4ed was removed.
4569 Value *Arg2 = CI->getArgOperand(2);
4570 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4571 return;
4572
4573 Value *Arg0 = CI->getArgOperand(0);
4574 Value *Arg1 = CI->getArgOperand(1);
4575 if (CI->getType()->isIntegerTy(64)) {
4576 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4577 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4578 }
4579
4580 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4581 cast<ConstantInt>(Arg2)->getZExtValue());
4582
4583 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4584 Value *Res = NewCall;
4585 if (Res->getType() != CI->getType())
4586 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4587 NewCall->takeName(CI);
4588 CI->replaceAllUsesWith(Res);
4589 CI->eraseFromParent();
4590 return;
4591 }
4592 case Intrinsic::riscv_sha256sig0:
4593 case Intrinsic::riscv_sha256sig1:
4594 case Intrinsic::riscv_sha256sum0:
4595 case Intrinsic::riscv_sha256sum1:
4596 case Intrinsic::riscv_sm3p0:
4597 case Intrinsic::riscv_sm3p1: {
4598 // The last argument to these intrinsics used to be i8 and changed to i32.
4599 // The type overload for sm4ks and sm4ed was removed.
4600 if (!CI->getType()->isIntegerTy(64))
4601 return;
4602
4603 Value *Arg =
4604 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4605
4606 NewCall = Builder.CreateCall(NewFn, Arg);
4607 Value *Res =
4608 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4609 NewCall->takeName(CI);
4610 CI->replaceAllUsesWith(Res);
4611 CI->eraseFromParent();
4612 return;
4613 }
4614
4615 case Intrinsic::x86_xop_vfrcz_ss:
4616 case Intrinsic::x86_xop_vfrcz_sd:
4617 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4618 break;
4619
4620 case Intrinsic::x86_xop_vpermil2pd:
4621 case Intrinsic::x86_xop_vpermil2ps:
4622 case Intrinsic::x86_xop_vpermil2pd_256:
4623 case Intrinsic::x86_xop_vpermil2ps_256: {
4624 SmallVector<Value *, 4> Args(CI->args());
4625 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4626 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4627 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4628 NewCall = Builder.CreateCall(NewFn, Args);
4629 break;
4630 }
4631
4632 case Intrinsic::x86_sse41_ptestc:
4633 case Intrinsic::x86_sse41_ptestz:
4634 case Intrinsic::x86_sse41_ptestnzc: {
4635 // The arguments for these intrinsics used to be v4f32, and changed
4636 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4637 // So, the only thing required is a bitcast for both arguments.
4638 // First, check the arguments have the old type.
4639 Value *Arg0 = CI->getArgOperand(0);
4640 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4641 return;
4642
4643 // Old intrinsic, add bitcasts
4644 Value *Arg1 = CI->getArgOperand(1);
4645
4646 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4647
4648 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4649 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4650
4651 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4652 break;
4653 }
4654
4655 case Intrinsic::x86_rdtscp: {
4656 // This used to take 1 arguments. If we have no arguments, it is already
4657 // upgraded.
4658 if (CI->getNumOperands() == 0)
4659 return;
4660
4661 NewCall = Builder.CreateCall(NewFn);
4662 // Extract the second result and store it.
4663 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4664 // Cast the pointer to the right type.
4665 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4666 llvm::PointerType::getUnqual(Data->getType()));
4667 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4668 // Replace the original call result with the first result of the new call.
4669 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4670
4671 NewCall->takeName(CI);
4672 CI->replaceAllUsesWith(TSC);
4673 CI->eraseFromParent();
4674 return;
4675 }
4676
4677 case Intrinsic::x86_sse41_insertps:
4678 case Intrinsic::x86_sse41_dppd:
4679 case Intrinsic::x86_sse41_dpps:
4680 case Intrinsic::x86_sse41_mpsadbw:
4681 case Intrinsic::x86_avx_dp_ps_256:
4682 case Intrinsic::x86_avx2_mpsadbw: {
4683 // Need to truncate the last argument from i32 to i8 -- this argument models
4684 // an inherently 8-bit immediate operand to these x86 instructions.
4685 SmallVector<Value *, 4> Args(CI->args());
4686
4687 // Replace the last argument with a trunc.
4688 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4689 NewCall = Builder.CreateCall(NewFn, Args);
4690 break;
4691 }
4692
4693 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4694 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4695 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4696 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4697 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4698 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4699 SmallVector<Value *, 4> Args(CI->args());
4700 unsigned NumElts =
4701 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4702 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4703
4704 NewCall = Builder.CreateCall(NewFn, Args);
4705 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4706
4707 NewCall->takeName(CI);
4708 CI->replaceAllUsesWith(Res);
4709 CI->eraseFromParent();
4710 return;
4711 }
4712
4713 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4714 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4715 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4716 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4717 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4718 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4719 SmallVector<Value *, 4> Args(CI->args());
4720 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4721 if (NewFn->getIntrinsicID() ==
4722 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4723 Args[1] = Builder.CreateBitCast(
4724 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4725
4726 NewCall = Builder.CreateCall(NewFn, Args);
4727 Value *Res = Builder.CreateBitCast(
4728 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4729
4730 NewCall->takeName(CI);
4731 CI->replaceAllUsesWith(Res);
4732 CI->eraseFromParent();
4733 return;
4734 }
4735 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4736 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4737 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4738 SmallVector<Value *, 4> Args(CI->args());
4739 unsigned NumElts =
4740 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4741 Args[1] = Builder.CreateBitCast(
4742 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4743 Args[2] = Builder.CreateBitCast(
4744 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4745
4746 NewCall = Builder.CreateCall(NewFn, Args);
4747 break;
4748 }
4749
4750 case Intrinsic::thread_pointer: {
4751 NewCall = Builder.CreateCall(NewFn, {});
4752 break;
4753 }
4754
4755 case Intrinsic::memcpy:
4756 case Intrinsic::memmove:
4757 case Intrinsic::memset: {
4758 // We have to make sure that the call signature is what we're expecting.
4759 // We only want to change the old signatures by removing the alignment arg:
4760 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4761 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4762 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4763 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4764 // Note: i8*'s in the above can be any pointer type
4765 if (CI->arg_size() != 5) {
4766 DefaultCase();
4767 return;
4768 }
4769 // Remove alignment argument (3), and add alignment attributes to the
4770 // dest/src pointers.
4771 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4772 CI->getArgOperand(2), CI->getArgOperand(4)};
4773 NewCall = Builder.CreateCall(NewFn, Args);
4774 AttributeList OldAttrs = CI->getAttributes();
4775 AttributeList NewAttrs = AttributeList::get(
4776 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4777 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4778 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4779 NewCall->setAttributes(NewAttrs);
4780 auto *MemCI = cast<MemIntrinsic>(NewCall);
4781 // All mem intrinsics support dest alignment.
4782 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4783 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4784 // Memcpy/Memmove also support source alignment.
4785 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4786 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4787 break;
4788 }
4789 }
4790 assert(NewCall && "Should have either set this variable or returned through "
4791 "the default case");
4792 NewCall->takeName(CI);
4793 CI->replaceAllUsesWith(NewCall);
4794 CI->eraseFromParent();
4795 }
4796
UpgradeCallsToIntrinsic(Function * F)4797 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4798 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4799
4800 // Check if this function should be upgraded and get the replacement function
4801 // if there is one.
4802 Function *NewFn;
4803 if (UpgradeIntrinsicFunction(F, NewFn)) {
4804 // Replace all users of the old function with the new function or new
4805 // instructions. This is not a range loop because the call is deleted.
4806 for (User *U : make_early_inc_range(F->users()))
4807 if (CallBase *CB = dyn_cast<CallBase>(U))
4808 UpgradeIntrinsicCall(CB, NewFn);
4809
4810 // Remove old function, no longer used, from the module.
4811 F->eraseFromParent();
4812 }
4813 }
4814
UpgradeTBAANode(MDNode & MD)4815 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4816 const unsigned NumOperands = MD.getNumOperands();
4817 if (NumOperands == 0)
4818 return &MD; // Invalid, punt to a verifier error.
4819
4820 // Check if the tag uses struct-path aware TBAA format.
4821 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4822 return &MD;
4823
4824 auto &Context = MD.getContext();
4825 if (NumOperands == 3) {
4826 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4827 MDNode *ScalarType = MDNode::get(Context, Elts);
4828 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4829 Metadata *Elts2[] = {ScalarType, ScalarType,
4830 ConstantAsMetadata::get(
4831 Constant::getNullValue(Type::getInt64Ty(Context))),
4832 MD.getOperand(2)};
4833 return MDNode::get(Context, Elts2);
4834 }
4835 // Create a MDNode <MD, MD, offset 0>
4836 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4837 Type::getInt64Ty(Context)))};
4838 return MDNode::get(Context, Elts);
4839 }
4840
UpgradeBitCastInst(unsigned Opc,Value * V,Type * DestTy,Instruction * & Temp)4841 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4842 Instruction *&Temp) {
4843 if (Opc != Instruction::BitCast)
4844 return nullptr;
4845
4846 Temp = nullptr;
4847 Type *SrcTy = V->getType();
4848 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4849 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4850 LLVMContext &Context = V->getContext();
4851
4852 // We have no information about target data layout, so we assume that
4853 // the maximum pointer size is 64bit.
4854 Type *MidTy = Type::getInt64Ty(Context);
4855 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4856
4857 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4858 }
4859
4860 return nullptr;
4861 }
4862
UpgradeBitCastExpr(unsigned Opc,Constant * C,Type * DestTy)4863 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4864 if (Opc != Instruction::BitCast)
4865 return nullptr;
4866
4867 Type *SrcTy = C->getType();
4868 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4869 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4870 LLVMContext &Context = C->getContext();
4871
4872 // We have no information about target data layout, so we assume that
4873 // the maximum pointer size is 64bit.
4874 Type *MidTy = Type::getInt64Ty(Context);
4875
4876 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4877 DestTy);
4878 }
4879
4880 return nullptr;
4881 }
4882
4883 /// Check the debug info version number, if it is out-dated, drop the debug
4884 /// info. Return true if module is modified.
UpgradeDebugInfo(Module & M)4885 bool llvm::UpgradeDebugInfo(Module &M) {
4886 if (DisableAutoUpgradeDebugInfo)
4887 return false;
4888
4889 unsigned Version = getDebugMetadataVersionFromModule(M);
4890 if (Version == DEBUG_METADATA_VERSION) {
4891 bool BrokenDebugInfo = false;
4892 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4893 report_fatal_error("Broken module found, compilation aborted!");
4894 if (!BrokenDebugInfo)
4895 // Everything is ok.
4896 return false;
4897 else {
4898 // Diagnose malformed debug info.
4899 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4900 M.getContext().diagnose(Diag);
4901 }
4902 }
4903 bool Modified = StripDebugInfo(M);
4904 if (Modified && Version != DEBUG_METADATA_VERSION) {
4905 // Diagnose a version mismatch.
4906 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4907 M.getContext().diagnose(DiagVersion);
4908 }
4909 return Modified;
4910 }
4911
4912 /// This checks for objc retain release marker which should be upgraded. It
4913 /// returns true if module is modified.
upgradeRetainReleaseMarker(Module & M)4914 static bool upgradeRetainReleaseMarker(Module &M) {
4915 bool Changed = false;
4916 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4917 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4918 if (ModRetainReleaseMarker) {
4919 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4920 if (Op) {
4921 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4922 if (ID) {
4923 SmallVector<StringRef, 4> ValueComp;
4924 ID->getString().split(ValueComp, "#");
4925 if (ValueComp.size() == 2) {
4926 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4927 ID = MDString::get(M.getContext(), NewValue);
4928 }
4929 M.addModuleFlag(Module::Error, MarkerKey, ID);
4930 M.eraseNamedMetadata(ModRetainReleaseMarker);
4931 Changed = true;
4932 }
4933 }
4934 }
4935 return Changed;
4936 }
4937
UpgradeARCRuntime(Module & M)4938 void llvm::UpgradeARCRuntime(Module &M) {
4939 // This lambda converts normal function calls to ARC runtime functions to
4940 // intrinsic calls.
4941 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4942 llvm::Intrinsic::ID IntrinsicFunc) {
4943 Function *Fn = M.getFunction(OldFunc);
4944
4945 if (!Fn)
4946 return;
4947
4948 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4949
4950 for (User *U : make_early_inc_range(Fn->users())) {
4951 CallInst *CI = dyn_cast<CallInst>(U);
4952 if (!CI || CI->getCalledFunction() != Fn)
4953 continue;
4954
4955 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4956 FunctionType *NewFuncTy = NewFn->getFunctionType();
4957 SmallVector<Value *, 2> Args;
4958
4959 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4960 // value to the return type of the old function.
4961 if (NewFuncTy->getReturnType() != CI->getType() &&
4962 !CastInst::castIsValid(Instruction::BitCast, CI,
4963 NewFuncTy->getReturnType()))
4964 continue;
4965
4966 bool InvalidCast = false;
4967
4968 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4969 Value *Arg = CI->getArgOperand(I);
4970
4971 // Bitcast argument to the parameter type of the new function if it's
4972 // not a variadic argument.
4973 if (I < NewFuncTy->getNumParams()) {
4974 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4975 // to the parameter type of the new function.
4976 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4977 NewFuncTy->getParamType(I))) {
4978 InvalidCast = true;
4979 break;
4980 }
4981 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4982 }
4983 Args.push_back(Arg);
4984 }
4985
4986 if (InvalidCast)
4987 continue;
4988
4989 // Create a call instruction that calls the new function.
4990 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4991 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4992 NewCall->takeName(CI);
4993
4994 // Bitcast the return value back to the type of the old call.
4995 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4996
4997 if (!CI->use_empty())
4998 CI->replaceAllUsesWith(NewRetVal);
4999 CI->eraseFromParent();
5000 }
5001
5002 if (Fn->use_empty())
5003 Fn->eraseFromParent();
5004 };
5005
5006 // Unconditionally convert a call to "clang.arc.use" to a call to
5007 // "llvm.objc.clang.arc.use".
5008 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5009
5010 // Upgrade the retain release marker. If there is no need to upgrade
5011 // the marker, that means either the module is already new enough to contain
5012 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5013 if (!upgradeRetainReleaseMarker(M))
5014 return;
5015
5016 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5017 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5018 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5019 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5020 {"objc_autoreleaseReturnValue",
5021 llvm::Intrinsic::objc_autoreleaseReturnValue},
5022 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5023 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5024 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5025 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5026 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5027 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5028 {"objc_release", llvm::Intrinsic::objc_release},
5029 {"objc_retain", llvm::Intrinsic::objc_retain},
5030 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5031 {"objc_retainAutoreleaseReturnValue",
5032 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5033 {"objc_retainAutoreleasedReturnValue",
5034 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5035 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5036 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5037 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5038 {"objc_unsafeClaimAutoreleasedReturnValue",
5039 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5040 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5041 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5042 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5043 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5044 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5045 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5046 {"objc_arc_annotation_topdown_bbstart",
5047 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5048 {"objc_arc_annotation_topdown_bbend",
5049 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5050 {"objc_arc_annotation_bottomup_bbstart",
5051 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5052 {"objc_arc_annotation_bottomup_bbend",
5053 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5054
5055 for (auto &I : RuntimeFuncs)
5056 UpgradeToIntrinsic(I.first, I.second);
5057 }
5058
UpgradeModuleFlags(Module & M)5059 bool llvm::UpgradeModuleFlags(Module &M) {
5060 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5061 if (!ModFlags)
5062 return false;
5063
5064 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5065 bool HasSwiftVersionFlag = false;
5066 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5067 uint32_t SwiftABIVersion;
5068 auto Int8Ty = Type::getInt8Ty(M.getContext());
5069 auto Int32Ty = Type::getInt32Ty(M.getContext());
5070
5071 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5072 MDNode *Op = ModFlags->getOperand(I);
5073 if (Op->getNumOperands() != 3)
5074 continue;
5075 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5076 if (!ID)
5077 continue;
5078 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5079 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5080 Type::getInt32Ty(M.getContext()), B)),
5081 MDString::get(M.getContext(), ID->getString()),
5082 Op->getOperand(2)};
5083 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5084 Changed = true;
5085 };
5086
5087 if (ID->getString() == "Objective-C Image Info Version")
5088 HasObjCFlag = true;
5089 if (ID->getString() == "Objective-C Class Properties")
5090 HasClassProperties = true;
5091 // Upgrade PIC from Error/Max to Min.
5092 if (ID->getString() == "PIC Level") {
5093 if (auto *Behavior =
5094 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5095 uint64_t V = Behavior->getLimitedValue();
5096 if (V == Module::Error || V == Module::Max)
5097 SetBehavior(Module::Min);
5098 }
5099 }
5100 // Upgrade "PIE Level" from Error to Max.
5101 if (ID->getString() == "PIE Level")
5102 if (auto *Behavior =
5103 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5104 if (Behavior->getLimitedValue() == Module::Error)
5105 SetBehavior(Module::Max);
5106
5107 // Upgrade branch protection and return address signing module flags. The
5108 // module flag behavior for these fields were Error and now they are Min.
5109 if (ID->getString() == "branch-target-enforcement" ||
5110 ID->getString().starts_with("sign-return-address")) {
5111 if (auto *Behavior =
5112 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5113 if (Behavior->getLimitedValue() == Module::Error) {
5114 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5115 Metadata *Ops[3] = {
5116 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5117 Op->getOperand(1), Op->getOperand(2)};
5118 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5119 Changed = true;
5120 }
5121 }
5122 }
5123
5124 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5125 // section name so that llvm-lto will not complain about mismatching
5126 // module flags that is functionally the same.
5127 if (ID->getString() == "Objective-C Image Info Section") {
5128 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5129 SmallVector<StringRef, 4> ValueComp;
5130 Value->getString().split(ValueComp, " ");
5131 if (ValueComp.size() != 1) {
5132 std::string NewValue;
5133 for (auto &S : ValueComp)
5134 NewValue += S.str();
5135 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5136 MDString::get(M.getContext(), NewValue)};
5137 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5138 Changed = true;
5139 }
5140 }
5141 }
5142
5143 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5144 // If the higher bits are set, it adds new module flag for swift info.
5145 if (ID->getString() == "Objective-C Garbage Collection") {
5146 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5147 if (Md) {
5148 assert(Md->getValue() && "Expected non-empty metadata");
5149 auto Type = Md->getValue()->getType();
5150 if (Type == Int8Ty)
5151 continue;
5152 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5153 if ((Val & 0xff) != Val) {
5154 HasSwiftVersionFlag = true;
5155 SwiftABIVersion = (Val & 0xff00) >> 8;
5156 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5157 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5158 }
5159 Metadata *Ops[3] = {
5160 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5161 Op->getOperand(1),
5162 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5163 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5164 Changed = true;
5165 }
5166 }
5167
5168 if (ID->getString() == "amdgpu_code_object_version") {
5169 Metadata *Ops[3] = {
5170 Op->getOperand(0),
5171 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5172 Op->getOperand(2)};
5173 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5174 Changed = true;
5175 }
5176 }
5177
5178 // "Objective-C Class Properties" is recently added for Objective-C. We
5179 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5180 // flag of value 0, so we can correclty downgrade this flag when trying to
5181 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5182 // this module flag.
5183 if (HasObjCFlag && !HasClassProperties) {
5184 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5185 (uint32_t)0);
5186 Changed = true;
5187 }
5188
5189 if (HasSwiftVersionFlag) {
5190 M.addModuleFlag(Module::Error, "Swift ABI Version",
5191 SwiftABIVersion);
5192 M.addModuleFlag(Module::Error, "Swift Major Version",
5193 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5194 M.addModuleFlag(Module::Error, "Swift Minor Version",
5195 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5196 Changed = true;
5197 }
5198
5199 return Changed;
5200 }
5201
UpgradeSectionAttributes(Module & M)5202 void llvm::UpgradeSectionAttributes(Module &M) {
5203 auto TrimSpaces = [](StringRef Section) -> std::string {
5204 SmallVector<StringRef, 5> Components;
5205 Section.split(Components, ',');
5206
5207 SmallString<32> Buffer;
5208 raw_svector_ostream OS(Buffer);
5209
5210 for (auto Component : Components)
5211 OS << ',' << Component.trim();
5212
5213 return std::string(OS.str().substr(1));
5214 };
5215
5216 for (auto &GV : M.globals()) {
5217 if (!GV.hasSection())
5218 continue;
5219
5220 StringRef Section = GV.getSection();
5221
5222 if (!Section.starts_with("__DATA, __objc_catlist"))
5223 continue;
5224
5225 // __DATA, __objc_catlist, regular, no_dead_strip
5226 // __DATA,__objc_catlist,regular,no_dead_strip
5227 GV.setSection(TrimSpaces(Section));
5228 }
5229 }
5230
5231 namespace {
5232 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5233 // callsites within a function that did not also have the strictfp attribute.
5234 // Since 10.0, if strict FP semantics are needed within a function, the
5235 // function must have the strictfp attribute and all calls within the function
5236 // must also have the strictfp attribute. This latter restriction is
5237 // necessary to prevent unwanted libcall simplification when a function is
5238 // being cloned (such as for inlining).
5239 //
5240 // The "dangling" strictfp attribute usage was only used to prevent constant
5241 // folding and other libcall simplification. The nobuiltin attribute on the
5242 // callsite has the same effect.
5243 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5244 StrictFPUpgradeVisitor() = default;
5245
visitCallBase__anonaa1e81db0511::StrictFPUpgradeVisitor5246 void visitCallBase(CallBase &Call) {
5247 if (!Call.isStrictFP())
5248 return;
5249 if (isa<ConstrainedFPIntrinsic>(&Call))
5250 return;
5251 // If we get here, the caller doesn't have the strictfp attribute
5252 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5253 Call.removeFnAttr(Attribute::StrictFP);
5254 Call.addFnAttr(Attribute::NoBuiltin);
5255 }
5256 };
5257 } // namespace
5258
UpgradeFunctionAttributes(Function & F)5259 void llvm::UpgradeFunctionAttributes(Function &F) {
5260 // If a function definition doesn't have the strictfp attribute,
5261 // convert any callsite strictfp attributes to nobuiltin.
5262 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5263 StrictFPUpgradeVisitor SFPV;
5264 SFPV.visit(F);
5265 }
5266
5267 // Remove all incompatibile attributes from function.
5268 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5269 for (auto &Arg : F.args())
5270 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5271
5272 // Older versions of LLVM treated an "implicit-section-name" attribute
5273 // similarly to directly setting the section on a Function.
5274 if (Attribute A = F.getFnAttribute("implicit-section-name");
5275 A.isValid() && A.isStringAttribute()) {
5276 F.setSection(A.getValueAsString());
5277 F.removeFnAttr("implicit-section-name");
5278 }
5279 }
5280
isOldLoopArgument(Metadata * MD)5281 static bool isOldLoopArgument(Metadata *MD) {
5282 auto *T = dyn_cast_or_null<MDTuple>(MD);
5283 if (!T)
5284 return false;
5285 if (T->getNumOperands() < 1)
5286 return false;
5287 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5288 if (!S)
5289 return false;
5290 return S->getString().starts_with("llvm.vectorizer.");
5291 }
5292
upgradeLoopTag(LLVMContext & C,StringRef OldTag)5293 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5294 StringRef OldPrefix = "llvm.vectorizer.";
5295 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5296
5297 if (OldTag == "llvm.vectorizer.unroll")
5298 return MDString::get(C, "llvm.loop.interleave.count");
5299
5300 return MDString::get(
5301 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5302 .str());
5303 }
5304
upgradeLoopArgument(Metadata * MD)5305 static Metadata *upgradeLoopArgument(Metadata *MD) {
5306 auto *T = dyn_cast_or_null<MDTuple>(MD);
5307 if (!T)
5308 return MD;
5309 if (T->getNumOperands() < 1)
5310 return MD;
5311 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5312 if (!OldTag)
5313 return MD;
5314 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5315 return MD;
5316
5317 // This has an old tag. Upgrade it.
5318 SmallVector<Metadata *, 8> Ops;
5319 Ops.reserve(T->getNumOperands());
5320 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5321 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5322 Ops.push_back(T->getOperand(I));
5323
5324 return MDTuple::get(T->getContext(), Ops);
5325 }
5326
upgradeInstructionLoopAttachment(MDNode & N)5327 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5328 auto *T = dyn_cast<MDTuple>(&N);
5329 if (!T)
5330 return &N;
5331
5332 if (none_of(T->operands(), isOldLoopArgument))
5333 return &N;
5334
5335 SmallVector<Metadata *, 8> Ops;
5336 Ops.reserve(T->getNumOperands());
5337 for (Metadata *MD : T->operands())
5338 Ops.push_back(upgradeLoopArgument(MD));
5339
5340 return MDTuple::get(T->getContext(), Ops);
5341 }
5342
UpgradeDataLayoutString(StringRef DL,StringRef TT)5343 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5344 Triple T(TT);
5345 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5346 // the address space of globals to 1. This does not apply to SPIRV Logical.
5347 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5348 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5349 !DL.contains("-G") && !DL.starts_with("G")) {
5350 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5351 }
5352
5353 if (T.isLoongArch64() || T.isRISCV64()) {
5354 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5355 auto I = DL.find("-n64-");
5356 if (I != StringRef::npos)
5357 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5358 return DL.str();
5359 }
5360
5361 std::string Res = DL.str();
5362 // AMDGCN data layout upgrades.
5363 if (T.isAMDGCN()) {
5364 // Define address spaces for constants.
5365 if (!DL.contains("-G") && !DL.starts_with("G"))
5366 Res.append(Res.empty() ? "G1" : "-G1");
5367
5368 // Add missing non-integral declarations.
5369 // This goes before adding new address spaces to prevent incoherent string
5370 // values.
5371 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5372 Res.append("-ni:7:8:9");
5373 // Update ni:7 to ni:7:8:9.
5374 if (DL.ends_with("ni:7"))
5375 Res.append(":8:9");
5376 if (DL.ends_with("ni:7:8"))
5377 Res.append(":9");
5378
5379 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5380 // resources) An empty data layout has already been upgraded to G1 by now.
5381 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5382 Res.append("-p7:160:256:256:32");
5383 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5384 Res.append("-p8:128:128");
5385 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5386 Res.append("-p9:192:256:256:32");
5387
5388 return Res;
5389 }
5390
5391 // AArch64 data layout upgrades.
5392 if (T.isAArch64()) {
5393 // Add "-Fn32"
5394 if (!DL.empty() && !DL.contains("-Fn32"))
5395 Res.append("-Fn32");
5396 return Res;
5397 }
5398
5399 if (!T.isX86())
5400 return Res;
5401
5402 // If the datalayout matches the expected format, add pointer size address
5403 // spaces to the datalayout.
5404 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5405 if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5406 SmallVector<StringRef, 4> Groups;
5407 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5408 if (R.match(Res, &Groups))
5409 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5410 }
5411
5412 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5413 // for i128 operations prior to this being reflected in the data layout, and
5414 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5415 // boundaries, so although this is a breaking change, the upgrade is expected
5416 // to fix more IR than it breaks.
5417 // Intel MCU is an exception and uses 4-byte-alignment.
5418 if (!T.isOSIAMCU()) {
5419 std::string I128 = "-i128:128";
5420 if (StringRef Ref = Res; !Ref.contains(I128)) {
5421 SmallVector<StringRef, 4> Groups;
5422 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5423 if (R.match(Res, &Groups))
5424 Res = (Groups[1] + I128 + Groups[3]).str();
5425 }
5426 }
5427
5428 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5429 // Raising the alignment is safe because Clang did not produce f80 values in
5430 // the MSVC environment before this upgrade was added.
5431 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5432 StringRef Ref = Res;
5433 auto I = Ref.find("-f80:32-");
5434 if (I != StringRef::npos)
5435 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5436 }
5437
5438 return Res;
5439 }
5440
UpgradeAttributes(AttrBuilder & B)5441 void llvm::UpgradeAttributes(AttrBuilder &B) {
5442 StringRef FramePointer;
5443 Attribute A = B.getAttribute("no-frame-pointer-elim");
5444 if (A.isValid()) {
5445 // The value can be "true" or "false".
5446 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5447 B.removeAttribute("no-frame-pointer-elim");
5448 }
5449 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5450 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5451 if (FramePointer != "all")
5452 FramePointer = "non-leaf";
5453 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5454 }
5455 if (!FramePointer.empty())
5456 B.addAttribute("frame-pointer", FramePointer);
5457
5458 A = B.getAttribute("null-pointer-is-valid");
5459 if (A.isValid()) {
5460 // The value can be "true" or "false".
5461 bool NullPointerIsValid = A.getValueAsString() == "true";
5462 B.removeAttribute("null-pointer-is-valid");
5463 if (NullPointerIsValid)
5464 B.addAttribute(Attribute::NullPointerIsValid);
5465 }
5466 }
5467
UpgradeOperandBundles(std::vector<OperandBundleDef> & Bundles)5468 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5469 // clang.arc.attachedcall bundles are now required to have an operand.
5470 // If they don't, it's okay to drop them entirely: when there is an operand,
5471 // the "attachedcall" is meaningful and required, but without an operand,
5472 // it's just a marker NOP. Dropping it merely prevents an optimization.
5473 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5474 return OBD.getTag() == "clang.arc.attachedcall" &&
5475 OBD.inputs().empty();
5476 });
5477 }
5478