1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/IR/AttributeMask.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DebugInfo.h"
26 #include "llvm/IR/DebugInfoMetadata.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Intrinsics.h"
34 #include "llvm/IR/IntrinsicsAArch64.h"
35 #include "llvm/IR/IntrinsicsARM.h"
36 #include "llvm/IR/IntrinsicsNVPTX.h"
37 #include "llvm/IR/IntrinsicsRISCV.h"
38 #include "llvm/IR/IntrinsicsWebAssembly.h"
39 #include "llvm/IR/IntrinsicsX86.h"
40 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/IR/MDBuilder.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/IR/Verifier.h"
46 #include "llvm/Support/AMDGPUAddrSpace.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/NVPTXAddrSpace.h"
50 #include "llvm/Support/Regex.h"
51 #include "llvm/TargetParser/Triple.h"
52 #include <cstdint>
53 #include <cstring>
54 #include <numeric>
55
56 using namespace llvm;
57
58 static cl::opt<bool>
59 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
60 cl::desc("Disable autoupgrade of debug info"));
61
rename(GlobalValue * GV)62 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
63
64 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
65 // changed their type from v4f32 to v2i64.
upgradePTESTIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)66 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
67 Function *&NewFn) {
68 // Check whether this is an old version of the function, which received
69 // v4f32 arguments.
70 Type *Arg0Type = F->getFunctionType()->getParamType(0);
71 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
72 return false;
73
74 // Yes, it's old, replace it with new version.
75 rename(F);
76 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
77 return true;
78 }
79
80 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
81 // arguments have changed their type from i32 to i8.
upgradeX86IntrinsicsWith8BitMask(Function * F,Intrinsic::ID IID,Function * & NewFn)82 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
83 Function *&NewFn) {
84 // Check that the last argument is an i32.
85 Type *LastArgType = F->getFunctionType()->getParamType(
86 F->getFunctionType()->getNumParams() - 1);
87 if (!LastArgType->isIntegerTy(32))
88 return false;
89
90 // Move this function aside and map down.
91 rename(F);
92 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
93 return true;
94 }
95
96 // Upgrade the declaration of fp compare intrinsics that change return type
97 // from scalar to vXi1 mask.
upgradeX86MaskedFPCompare(Function * F,Intrinsic::ID IID,Function * & NewFn)98 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
99 Function *&NewFn) {
100 // Check if the return type is a vector.
101 if (F->getReturnType()->isVectorTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107 }
108
upgradeX86BF16Intrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)109 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
110 Function *&NewFn) {
111 if (F->getReturnType()->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
116 return true;
117 }
118
upgradeX86BF16DPIntrinsic(Function * F,Intrinsic::ID IID,Function * & NewFn)119 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
120 Function *&NewFn) {
121 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127 }
128
shouldUpgradeX86Intrinsic(Function * F,StringRef Name)129 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
130 // All of the intrinsics matches below should be marked with which llvm
131 // version started autoupgrading them. At some point in the future we would
132 // like to use this information to remove upgrade code for some older
133 // intrinsics. It is currently undecided how we will determine that future
134 // point.
135 if (Name.consume_front("avx."))
136 return (Name.starts_with("blend.p") || // Added in 3.7
137 Name == "cvt.ps2.pd.256" || // Added in 3.9
138 Name == "cvtdq2.pd.256" || // Added in 3.9
139 Name == "cvtdq2.ps.256" || // Added in 7.0
140 Name.starts_with("movnt.") || // Added in 3.2
141 Name.starts_with("sqrt.p") || // Added in 7.0
142 Name.starts_with("storeu.") || // Added in 3.9
143 Name.starts_with("vbroadcast.s") || // Added in 3.5
144 Name.starts_with("vbroadcastf128") || // Added in 4.0
145 Name.starts_with("vextractf128.") || // Added in 3.7
146 Name.starts_with("vinsertf128.") || // Added in 3.7
147 Name.starts_with("vperm2f128.") || // Added in 6.0
148 Name.starts_with("vpermil.")); // Added in 3.1
149
150 if (Name.consume_front("avx2."))
151 return (Name == "movntdqa" || // Added in 5.0
152 Name.starts_with("pabs.") || // Added in 6.0
153 Name.starts_with("padds.") || // Added in 8.0
154 Name.starts_with("paddus.") || // Added in 8.0
155 Name.starts_with("pblendd.") || // Added in 3.7
156 Name == "pblendw" || // Added in 3.7
157 Name.starts_with("pbroadcast") || // Added in 3.8
158 Name.starts_with("pcmpeq.") || // Added in 3.1
159 Name.starts_with("pcmpgt.") || // Added in 3.1
160 Name.starts_with("pmax") || // Added in 3.9
161 Name.starts_with("pmin") || // Added in 3.9
162 Name.starts_with("pmovsx") || // Added in 3.9
163 Name.starts_with("pmovzx") || // Added in 3.9
164 Name == "pmul.dq" || // Added in 7.0
165 Name == "pmulu.dq" || // Added in 7.0
166 Name.starts_with("psll.dq") || // Added in 3.7
167 Name.starts_with("psrl.dq") || // Added in 3.7
168 Name.starts_with("psubs.") || // Added in 8.0
169 Name.starts_with("psubus.") || // Added in 8.0
170 Name.starts_with("vbroadcast") || // Added in 3.8
171 Name == "vbroadcasti128" || // Added in 3.7
172 Name == "vextracti128" || // Added in 3.7
173 Name == "vinserti128" || // Added in 3.7
174 Name == "vperm2i128"); // Added in 6.0
175
176 if (Name.consume_front("avx512.")) {
177 if (Name.consume_front("mask."))
178 // 'avx512.mask.*'
179 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
180 Name.starts_with("and.") || // Added in 3.9
181 Name.starts_with("andn.") || // Added in 3.9
182 Name.starts_with("broadcast.s") || // Added in 3.9
183 Name.starts_with("broadcastf32x4.") || // Added in 6.0
184 Name.starts_with("broadcastf32x8.") || // Added in 6.0
185 Name.starts_with("broadcastf64x2.") || // Added in 6.0
186 Name.starts_with("broadcastf64x4.") || // Added in 6.0
187 Name.starts_with("broadcasti32x4.") || // Added in 6.0
188 Name.starts_with("broadcasti32x8.") || // Added in 6.0
189 Name.starts_with("broadcasti64x2.") || // Added in 6.0
190 Name.starts_with("broadcasti64x4.") || // Added in 6.0
191 Name.starts_with("cmp.b") || // Added in 5.0
192 Name.starts_with("cmp.d") || // Added in 5.0
193 Name.starts_with("cmp.q") || // Added in 5.0
194 Name.starts_with("cmp.w") || // Added in 5.0
195 Name.starts_with("compress.b") || // Added in 9.0
196 Name.starts_with("compress.d") || // Added in 9.0
197 Name.starts_with("compress.p") || // Added in 9.0
198 Name.starts_with("compress.q") || // Added in 9.0
199 Name.starts_with("compress.store.") || // Added in 7.0
200 Name.starts_with("compress.w") || // Added in 9.0
201 Name.starts_with("conflict.") || // Added in 9.0
202 Name.starts_with("cvtdq2pd.") || // Added in 4.0
203 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
204 Name == "cvtpd2dq.256" || // Added in 7.0
205 Name == "cvtpd2ps.256" || // Added in 7.0
206 Name == "cvtps2pd.128" || // Added in 7.0
207 Name == "cvtps2pd.256" || // Added in 7.0
208 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
209 Name == "cvtqq2ps.256" || // Added in 9.0
210 Name == "cvtqq2ps.512" || // Added in 9.0
211 Name == "cvttpd2dq.256" || // Added in 7.0
212 Name == "cvttps2dq.128" || // Added in 7.0
213 Name == "cvttps2dq.256" || // Added in 7.0
214 Name.starts_with("cvtudq2pd.") || // Added in 4.0
215 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
216 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
217 Name == "cvtuqq2ps.256" || // Added in 9.0
218 Name == "cvtuqq2ps.512" || // Added in 9.0
219 Name.starts_with("dbpsadbw.") || // Added in 7.0
220 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
221 Name.starts_with("expand.b") || // Added in 9.0
222 Name.starts_with("expand.d") || // Added in 9.0
223 Name.starts_with("expand.load.") || // Added in 7.0
224 Name.starts_with("expand.p") || // Added in 9.0
225 Name.starts_with("expand.q") || // Added in 9.0
226 Name.starts_with("expand.w") || // Added in 9.0
227 Name.starts_with("fpclass.p") || // Added in 7.0
228 Name.starts_with("insert") || // Added in 4.0
229 Name.starts_with("load.") || // Added in 3.9
230 Name.starts_with("loadu.") || // Added in 3.9
231 Name.starts_with("lzcnt.") || // Added in 5.0
232 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
233 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
234 Name.starts_with("movddup") || // Added in 3.9
235 Name.starts_with("move.s") || // Added in 4.0
236 Name.starts_with("movshdup") || // Added in 3.9
237 Name.starts_with("movsldup") || // Added in 3.9
238 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
239 Name.starts_with("or.") || // Added in 3.9
240 Name.starts_with("pabs.") || // Added in 6.0
241 Name.starts_with("packssdw.") || // Added in 5.0
242 Name.starts_with("packsswb.") || // Added in 5.0
243 Name.starts_with("packusdw.") || // Added in 5.0
244 Name.starts_with("packuswb.") || // Added in 5.0
245 Name.starts_with("padd.") || // Added in 4.0
246 Name.starts_with("padds.") || // Added in 8.0
247 Name.starts_with("paddus.") || // Added in 8.0
248 Name.starts_with("palignr.") || // Added in 3.9
249 Name.starts_with("pand.") || // Added in 3.9
250 Name.starts_with("pandn.") || // Added in 3.9
251 Name.starts_with("pavg") || // Added in 6.0
252 Name.starts_with("pbroadcast") || // Added in 6.0
253 Name.starts_with("pcmpeq.") || // Added in 3.9
254 Name.starts_with("pcmpgt.") || // Added in 3.9
255 Name.starts_with("perm.df.") || // Added in 3.9
256 Name.starts_with("perm.di.") || // Added in 3.9
257 Name.starts_with("permvar.") || // Added in 7.0
258 Name.starts_with("pmaddubs.w.") || // Added in 7.0
259 Name.starts_with("pmaddw.d.") || // Added in 7.0
260 Name.starts_with("pmax") || // Added in 4.0
261 Name.starts_with("pmin") || // Added in 4.0
262 Name == "pmov.qd.256" || // Added in 9.0
263 Name == "pmov.qd.512" || // Added in 9.0
264 Name == "pmov.wb.256" || // Added in 9.0
265 Name == "pmov.wb.512" || // Added in 9.0
266 Name.starts_with("pmovsx") || // Added in 4.0
267 Name.starts_with("pmovzx") || // Added in 4.0
268 Name.starts_with("pmul.dq.") || // Added in 4.0
269 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
270 Name.starts_with("pmulh.w.") || // Added in 7.0
271 Name.starts_with("pmulhu.w.") || // Added in 7.0
272 Name.starts_with("pmull.") || // Added in 4.0
273 Name.starts_with("pmultishift.qb.") || // Added in 8.0
274 Name.starts_with("pmulu.dq.") || // Added in 4.0
275 Name.starts_with("por.") || // Added in 3.9
276 Name.starts_with("prol.") || // Added in 8.0
277 Name.starts_with("prolv.") || // Added in 8.0
278 Name.starts_with("pror.") || // Added in 8.0
279 Name.starts_with("prorv.") || // Added in 8.0
280 Name.starts_with("pshuf.b.") || // Added in 4.0
281 Name.starts_with("pshuf.d.") || // Added in 3.9
282 Name.starts_with("pshufh.w.") || // Added in 3.9
283 Name.starts_with("pshufl.w.") || // Added in 3.9
284 Name.starts_with("psll.d") || // Added in 4.0
285 Name.starts_with("psll.q") || // Added in 4.0
286 Name.starts_with("psll.w") || // Added in 4.0
287 Name.starts_with("pslli") || // Added in 4.0
288 Name.starts_with("psllv") || // Added in 4.0
289 Name.starts_with("psra.d") || // Added in 4.0
290 Name.starts_with("psra.q") || // Added in 4.0
291 Name.starts_with("psra.w") || // Added in 4.0
292 Name.starts_with("psrai") || // Added in 4.0
293 Name.starts_with("psrav") || // Added in 4.0
294 Name.starts_with("psrl.d") || // Added in 4.0
295 Name.starts_with("psrl.q") || // Added in 4.0
296 Name.starts_with("psrl.w") || // Added in 4.0
297 Name.starts_with("psrli") || // Added in 4.0
298 Name.starts_with("psrlv") || // Added in 4.0
299 Name.starts_with("psub.") || // Added in 4.0
300 Name.starts_with("psubs.") || // Added in 8.0
301 Name.starts_with("psubus.") || // Added in 8.0
302 Name.starts_with("pternlog.") || // Added in 7.0
303 Name.starts_with("punpckh") || // Added in 3.9
304 Name.starts_with("punpckl") || // Added in 3.9
305 Name.starts_with("pxor.") || // Added in 3.9
306 Name.starts_with("shuf.f") || // Added in 6.0
307 Name.starts_with("shuf.i") || // Added in 6.0
308 Name.starts_with("shuf.p") || // Added in 4.0
309 Name.starts_with("sqrt.p") || // Added in 7.0
310 Name.starts_with("store.b.") || // Added in 3.9
311 Name.starts_with("store.d.") || // Added in 3.9
312 Name.starts_with("store.p") || // Added in 3.9
313 Name.starts_with("store.q.") || // Added in 3.9
314 Name.starts_with("store.w.") || // Added in 3.9
315 Name == "store.ss" || // Added in 7.0
316 Name.starts_with("storeu.") || // Added in 3.9
317 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
318 Name.starts_with("ucmp.") || // Added in 5.0
319 Name.starts_with("unpckh.") || // Added in 3.9
320 Name.starts_with("unpckl.") || // Added in 3.9
321 Name.starts_with("valign.") || // Added in 4.0
322 Name == "vcvtph2ps.128" || // Added in 11.0
323 Name == "vcvtph2ps.256" || // Added in 11.0
324 Name.starts_with("vextract") || // Added in 4.0
325 Name.starts_with("vfmadd.") || // Added in 7.0
326 Name.starts_with("vfmaddsub.") || // Added in 7.0
327 Name.starts_with("vfnmadd.") || // Added in 7.0
328 Name.starts_with("vfnmsub.") || // Added in 7.0
329 Name.starts_with("vpdpbusd.") || // Added in 7.0
330 Name.starts_with("vpdpbusds.") || // Added in 7.0
331 Name.starts_with("vpdpwssd.") || // Added in 7.0
332 Name.starts_with("vpdpwssds.") || // Added in 7.0
333 Name.starts_with("vpermi2var.") || // Added in 7.0
334 Name.starts_with("vpermil.p") || // Added in 3.9
335 Name.starts_with("vpermilvar.") || // Added in 4.0
336 Name.starts_with("vpermt2var.") || // Added in 7.0
337 Name.starts_with("vpmadd52") || // Added in 7.0
338 Name.starts_with("vpshld.") || // Added in 7.0
339 Name.starts_with("vpshldv.") || // Added in 8.0
340 Name.starts_with("vpshrd.") || // Added in 7.0
341 Name.starts_with("vpshrdv.") || // Added in 8.0
342 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
343 Name.starts_with("xor.")); // Added in 3.9
344
345 if (Name.consume_front("mask3."))
346 // 'avx512.mask3.*'
347 return (Name.starts_with("vfmadd.") || // Added in 7.0
348 Name.starts_with("vfmaddsub.") || // Added in 7.0
349 Name.starts_with("vfmsub.") || // Added in 7.0
350 Name.starts_with("vfmsubadd.") || // Added in 7.0
351 Name.starts_with("vfnmsub.")); // Added in 7.0
352
353 if (Name.consume_front("maskz."))
354 // 'avx512.maskz.*'
355 return (Name.starts_with("pternlog.") || // Added in 7.0
356 Name.starts_with("vfmadd.") || // Added in 7.0
357 Name.starts_with("vfmaddsub.") || // Added in 7.0
358 Name.starts_with("vpdpbusd.") || // Added in 7.0
359 Name.starts_with("vpdpbusds.") || // Added in 7.0
360 Name.starts_with("vpdpwssd.") || // Added in 7.0
361 Name.starts_with("vpdpwssds.") || // Added in 7.0
362 Name.starts_with("vpermt2var.") || // Added in 7.0
363 Name.starts_with("vpmadd52") || // Added in 7.0
364 Name.starts_with("vpshldv.") || // Added in 8.0
365 Name.starts_with("vpshrdv.")); // Added in 8.0
366
367 // 'avx512.*'
368 return (Name == "movntdqa" || // Added in 5.0
369 Name == "pmul.dq.512" || // Added in 7.0
370 Name == "pmulu.dq.512" || // Added in 7.0
371 Name.starts_with("broadcastm") || // Added in 6.0
372 Name.starts_with("cmp.p") || // Added in 12.0
373 Name.starts_with("cvtb2mask.") || // Added in 7.0
374 Name.starts_with("cvtd2mask.") || // Added in 7.0
375 Name.starts_with("cvtmask2") || // Added in 5.0
376 Name.starts_with("cvtq2mask.") || // Added in 7.0
377 Name == "cvtusi2sd" || // Added in 7.0
378 Name.starts_with("cvtw2mask.") || // Added in 7.0
379 Name == "kand.w" || // Added in 7.0
380 Name == "kandn.w" || // Added in 7.0
381 Name == "knot.w" || // Added in 7.0
382 Name == "kor.w" || // Added in 7.0
383 Name == "kortestc.w" || // Added in 7.0
384 Name == "kortestz.w" || // Added in 7.0
385 Name.starts_with("kunpck") || // added in 6.0
386 Name == "kxnor.w" || // Added in 7.0
387 Name == "kxor.w" || // Added in 7.0
388 Name.starts_with("padds.") || // Added in 8.0
389 Name.starts_with("pbroadcast") || // Added in 3.9
390 Name.starts_with("prol") || // Added in 8.0
391 Name.starts_with("pror") || // Added in 8.0
392 Name.starts_with("psll.dq") || // Added in 3.9
393 Name.starts_with("psrl.dq") || // Added in 3.9
394 Name.starts_with("psubs.") || // Added in 8.0
395 Name.starts_with("ptestm") || // Added in 6.0
396 Name.starts_with("ptestnm") || // Added in 6.0
397 Name.starts_with("storent.") || // Added in 3.9
398 Name.starts_with("vbroadcast.s") || // Added in 7.0
399 Name.starts_with("vpshld.") || // Added in 8.0
400 Name.starts_with("vpshrd.")); // Added in 8.0
401 }
402
403 if (Name.consume_front("fma."))
404 return (Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmsub.") || // Added in 7.0
406 Name.starts_with("vfmsubadd.") || // Added in 7.0
407 Name.starts_with("vfnmadd.") || // Added in 7.0
408 Name.starts_with("vfnmsub.")); // Added in 7.0
409
410 if (Name.consume_front("fma4."))
411 return Name.starts_with("vfmadd.s"); // Added in 7.0
412
413 if (Name.consume_front("sse."))
414 return (Name == "add.ss" || // Added in 4.0
415 Name == "cvtsi2ss" || // Added in 7.0
416 Name == "cvtsi642ss" || // Added in 7.0
417 Name == "div.ss" || // Added in 4.0
418 Name == "mul.ss" || // Added in 4.0
419 Name.starts_with("sqrt.p") || // Added in 7.0
420 Name == "sqrt.ss" || // Added in 7.0
421 Name.starts_with("storeu.") || // Added in 3.9
422 Name == "sub.ss"); // Added in 4.0
423
424 if (Name.consume_front("sse2."))
425 return (Name == "add.sd" || // Added in 4.0
426 Name == "cvtdq2pd" || // Added in 3.9
427 Name == "cvtdq2ps" || // Added in 7.0
428 Name == "cvtps2pd" || // Added in 3.9
429 Name == "cvtsi2sd" || // Added in 7.0
430 Name == "cvtsi642sd" || // Added in 7.0
431 Name == "cvtss2sd" || // Added in 7.0
432 Name == "div.sd" || // Added in 4.0
433 Name == "mul.sd" || // Added in 4.0
434 Name.starts_with("padds.") || // Added in 8.0
435 Name.starts_with("paddus.") || // Added in 8.0
436 Name.starts_with("pcmpeq.") || // Added in 3.1
437 Name.starts_with("pcmpgt.") || // Added in 3.1
438 Name == "pmaxs.w" || // Added in 3.9
439 Name == "pmaxu.b" || // Added in 3.9
440 Name == "pmins.w" || // Added in 3.9
441 Name == "pminu.b" || // Added in 3.9
442 Name == "pmulu.dq" || // Added in 7.0
443 Name.starts_with("pshuf") || // Added in 3.9
444 Name.starts_with("psll.dq") || // Added in 3.7
445 Name.starts_with("psrl.dq") || // Added in 3.7
446 Name.starts_with("psubs.") || // Added in 8.0
447 Name.starts_with("psubus.") || // Added in 8.0
448 Name.starts_with("sqrt.p") || // Added in 7.0
449 Name == "sqrt.sd" || // Added in 7.0
450 Name == "storel.dq" || // Added in 3.9
451 Name.starts_with("storeu.") || // Added in 3.9
452 Name == "sub.sd"); // Added in 4.0
453
454 if (Name.consume_front("sse41."))
455 return (Name.starts_with("blendp") || // Added in 3.7
456 Name == "movntdqa" || // Added in 5.0
457 Name == "pblendw" || // Added in 3.7
458 Name == "pmaxsb" || // Added in 3.9
459 Name == "pmaxsd" || // Added in 3.9
460 Name == "pmaxud" || // Added in 3.9
461 Name == "pmaxuw" || // Added in 3.9
462 Name == "pminsb" || // Added in 3.9
463 Name == "pminsd" || // Added in 3.9
464 Name == "pminud" || // Added in 3.9
465 Name == "pminuw" || // Added in 3.9
466 Name.starts_with("pmovsx") || // Added in 3.8
467 Name.starts_with("pmovzx") || // Added in 3.9
468 Name == "pmuldq"); // Added in 7.0
469
470 if (Name.consume_front("sse42."))
471 return Name == "crc32.64.8"; // Added in 3.4
472
473 if (Name.consume_front("sse4a."))
474 return Name.starts_with("movnt."); // Added in 3.9
475
476 if (Name.consume_front("ssse3."))
477 return (Name == "pabs.b.128" || // Added in 6.0
478 Name == "pabs.d.128" || // Added in 6.0
479 Name == "pabs.w.128"); // Added in 6.0
480
481 if (Name.consume_front("xop."))
482 return (Name == "vpcmov" || // Added in 3.8
483 Name == "vpcmov.256" || // Added in 5.0
484 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
485 Name.starts_with("vprot")); // Added in 8.0
486
487 return (Name == "addcarry.u32" || // Added in 8.0
488 Name == "addcarry.u64" || // Added in 8.0
489 Name == "addcarryx.u32" || // Added in 8.0
490 Name == "addcarryx.u64" || // Added in 8.0
491 Name == "subborrow.u32" || // Added in 8.0
492 Name == "subborrow.u64" || // Added in 8.0
493 Name.starts_with("vcvtph2ps.")); // Added in 11.0
494 }
495
upgradeX86IntrinsicFunction(Function * F,StringRef Name,Function * & NewFn)496 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
497 Function *&NewFn) {
498 // Only handle intrinsics that start with "x86.".
499 if (!Name.consume_front("x86."))
500 return false;
501
502 if (shouldUpgradeX86Intrinsic(F, Name)) {
503 NewFn = nullptr;
504 return true;
505 }
506
507 if (Name == "rdtscp") { // Added in 8.0
508 // If this intrinsic has 0 operands, it's the new version.
509 if (F->getFunctionType()->getNumParams() == 0)
510 return false;
511
512 rename(F);
513 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
514 Intrinsic::x86_rdtscp);
515 return true;
516 }
517
518 Intrinsic::ID ID;
519
520 // SSE4.1 ptest functions may have an old signature.
521 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
522 ID = StringSwitch<Intrinsic::ID>(Name)
523 .Case("c", Intrinsic::x86_sse41_ptestc)
524 .Case("z", Intrinsic::x86_sse41_ptestz)
525 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
526 .Default(Intrinsic::not_intrinsic);
527 if (ID != Intrinsic::not_intrinsic)
528 return upgradePTESTIntrinsic(F, ID, NewFn);
529
530 return false;
531 }
532
533 // Several blend and other instructions with masks used the wrong number of
534 // bits.
535
536 // Added in 3.6
537 ID = StringSwitch<Intrinsic::ID>(Name)
538 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
539 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
540 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
541 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
542 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
543 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
544 .Default(Intrinsic::not_intrinsic);
545 if (ID != Intrinsic::not_intrinsic)
546 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
547
548 if (Name.consume_front("avx512.mask.cmp.")) {
549 // Added in 7.0
550 ID = StringSwitch<Intrinsic::ID>(Name)
551 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
552 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
553 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
554 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
555 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
556 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
557 .Default(Intrinsic::not_intrinsic);
558 if (ID != Intrinsic::not_intrinsic)
559 return upgradeX86MaskedFPCompare(F, ID, NewFn);
560 return false; // No other 'x86.avx523.mask.cmp.*'.
561 }
562
563 if (Name.consume_front("avx512bf16.")) {
564 // Added in 9.0
565 ID = StringSwitch<Intrinsic::ID>(Name)
566 .Case("cvtne2ps2bf16.128",
567 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
568 .Case("cvtne2ps2bf16.256",
569 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
570 .Case("cvtne2ps2bf16.512",
571 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
572 .Case("mask.cvtneps2bf16.128",
573 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
574 .Case("cvtneps2bf16.256",
575 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
576 .Case("cvtneps2bf16.512",
577 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
578 .Default(Intrinsic::not_intrinsic);
579 if (ID != Intrinsic::not_intrinsic)
580 return upgradeX86BF16Intrinsic(F, ID, NewFn);
581
582 // Added in 9.0
583 ID = StringSwitch<Intrinsic::ID>(Name)
584 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
585 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
586 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
587 .Default(Intrinsic::not_intrinsic);
588 if (ID != Intrinsic::not_intrinsic)
589 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
590 return false; // No other 'x86.avx512bf16.*'.
591 }
592
593 if (Name.consume_front("xop.")) {
594 Intrinsic::ID ID = Intrinsic::not_intrinsic;
595 if (Name.starts_with("vpermil2")) { // Added in 3.9
596 // Upgrade any XOP PERMIL2 index operand still using a float/double
597 // vector.
598 auto Idx = F->getFunctionType()->getParamType(2);
599 if (Idx->isFPOrFPVectorTy()) {
600 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
601 unsigned EltSize = Idx->getScalarSizeInBits();
602 if (EltSize == 64 && IdxSize == 128)
603 ID = Intrinsic::x86_xop_vpermil2pd;
604 else if (EltSize == 32 && IdxSize == 128)
605 ID = Intrinsic::x86_xop_vpermil2ps;
606 else if (EltSize == 64 && IdxSize == 256)
607 ID = Intrinsic::x86_xop_vpermil2pd_256;
608 else
609 ID = Intrinsic::x86_xop_vpermil2ps_256;
610 }
611 } else if (F->arg_size() == 2)
612 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
613 ID = StringSwitch<Intrinsic::ID>(Name)
614 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
615 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
616 .Default(Intrinsic::not_intrinsic);
617
618 if (ID != Intrinsic::not_intrinsic) {
619 rename(F);
620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
621 return true;
622 }
623 return false; // No other 'x86.xop.*'
624 }
625
626 if (Name == "seh.recoverfp") {
627 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
628 Intrinsic::eh_recoverfp);
629 return true;
630 }
631
632 return false;
633 }
634
635 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
636 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
upgradeArmOrAarch64IntrinsicFunction(bool IsArm,Function * F,StringRef Name,Function * & NewFn)637 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
638 StringRef Name,
639 Function *&NewFn) {
640 if (Name.starts_with("rbit")) {
641 // '(arm|aarch64).rbit'.
642 NewFn = Intrinsic::getOrInsertDeclaration(
643 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
644 return true;
645 }
646
647 if (Name == "thread.pointer") {
648 // '(arm|aarch64).thread.pointer'.
649 NewFn = Intrinsic::getOrInsertDeclaration(
650 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
651 return true;
652 }
653
654 bool Neon = Name.consume_front("neon.");
655 if (Neon) {
656 // '(arm|aarch64).neon.*'.
657 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
658 // v16i8 respectively.
659 if (Name.consume_front("bfdot.")) {
660 // (arm|aarch64).neon.bfdot.*'.
661 Intrinsic::ID ID =
662 StringSwitch<Intrinsic::ID>(Name)
663 .Cases("v2f32.v8i8", "v4f32.v16i8",
664 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
665 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
666 .Default(Intrinsic::not_intrinsic);
667 if (ID != Intrinsic::not_intrinsic) {
668 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
669 assert((OperandWidth == 64 || OperandWidth == 128) &&
670 "Unexpected operand width");
671 LLVMContext &Ctx = F->getParent()->getContext();
672 std::array<Type *, 2> Tys{
673 {F->getReturnType(),
674 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
675 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
676 return true;
677 }
678 return false; // No other '(arm|aarch64).neon.bfdot.*'.
679 }
680
681 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
682 // anymore and accept v8bf16 instead of v16i8.
683 if (Name.consume_front("bfm")) {
684 // (arm|aarch64).neon.bfm*'.
685 if (Name.consume_back(".v4f32.v16i8")) {
686 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
687 Intrinsic::ID ID =
688 StringSwitch<Intrinsic::ID>(Name)
689 .Case("mla",
690 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
691 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
692 .Case("lalb",
693 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
694 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
695 .Case("lalt",
696 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
697 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
698 .Default(Intrinsic::not_intrinsic);
699 if (ID != Intrinsic::not_intrinsic) {
700 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
701 return true;
702 }
703 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
704 }
705 return false; // No other '(arm|aarch64).neon.bfm*.
706 }
707 // Continue on to Aarch64 Neon or Arm Neon.
708 }
709 // Continue on to Arm or Aarch64.
710
711 if (IsArm) {
712 // 'arm.*'.
713 if (Neon) {
714 // 'arm.neon.*'.
715 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
716 .StartsWith("vclz.", Intrinsic::ctlz)
717 .StartsWith("vcnt.", Intrinsic::ctpop)
718 .StartsWith("vqadds.", Intrinsic::sadd_sat)
719 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
720 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
721 .StartsWith("vqsubu.", Intrinsic::usub_sat)
722 .StartsWith("vrinta.", Intrinsic::round)
723 .StartsWith("vrintn.", Intrinsic::roundeven)
724 .StartsWith("vrintm.", Intrinsic::floor)
725 .StartsWith("vrintp.", Intrinsic::ceil)
726 .StartsWith("vrintx.", Intrinsic::rint)
727 .StartsWith("vrintz.", Intrinsic::trunc)
728 .Default(Intrinsic::not_intrinsic);
729 if (ID != Intrinsic::not_intrinsic) {
730 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
731 F->arg_begin()->getType());
732 return true;
733 }
734
735 if (Name.consume_front("vst")) {
736 // 'arm.neon.vst*'.
737 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
738 SmallVector<StringRef, 2> Groups;
739 if (vstRegex.match(Name, &Groups)) {
740 static const Intrinsic::ID StoreInts[] = {
741 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
742 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
743
744 static const Intrinsic::ID StoreLaneInts[] = {
745 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
746 Intrinsic::arm_neon_vst4lane};
747
748 auto fArgs = F->getFunctionType()->params();
749 Type *Tys[] = {fArgs[0], fArgs[1]};
750 if (Groups[1].size() == 1)
751 NewFn = Intrinsic::getOrInsertDeclaration(
752 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
753 else
754 NewFn = Intrinsic::getOrInsertDeclaration(
755 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
756 return true;
757 }
758 return false; // No other 'arm.neon.vst*'.
759 }
760
761 return false; // No other 'arm.neon.*'.
762 }
763
764 if (Name.consume_front("mve.")) {
765 // 'arm.mve.*'.
766 if (Name == "vctp64") {
767 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
768 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
769 // the function and deal with it below in UpgradeIntrinsicCall.
770 rename(F);
771 return true;
772 }
773 return false; // Not 'arm.mve.vctp64'.
774 }
775
776 if (Name.starts_with("vrintn.v")) {
777 NewFn = Intrinsic::getOrInsertDeclaration(
778 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
779 return true;
780 }
781
782 // These too are changed to accept a v2i1 instead of the old v4i1.
783 if (Name.consume_back(".v4i1")) {
784 // 'arm.mve.*.v4i1'.
785 if (Name.consume_back(".predicated.v2i64.v4i32"))
786 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
787 return Name == "mull.int" || Name == "vqdmull";
788
789 if (Name.consume_back(".v2i64")) {
790 // 'arm.mve.*.v2i64.v4i1'
791 bool IsGather = Name.consume_front("vldr.gather.");
792 if (IsGather || Name.consume_front("vstr.scatter.")) {
793 if (Name.consume_front("base.")) {
794 // Optional 'wb.' prefix.
795 Name.consume_front("wb.");
796 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
797 // predicated.v2i64.v2i64.v4i1'.
798 return Name == "predicated.v2i64";
799 }
800
801 if (Name.consume_front("offset.predicated."))
802 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
803 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
804
805 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
806 return false;
807 }
808
809 return false; // No other 'arm.mve.*.v2i64.v4i1'.
810 }
811 return false; // No other 'arm.mve.*.v4i1'.
812 }
813 return false; // No other 'arm.mve.*'.
814 }
815
816 if (Name.consume_front("cde.vcx")) {
817 // 'arm.cde.vcx*'.
818 if (Name.consume_back(".predicated.v2i64.v4i1"))
819 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
820 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
821 Name == "3q" || Name == "3qa";
822
823 return false; // No other 'arm.cde.vcx*'.
824 }
825 } else {
826 // 'aarch64.*'.
827 if (Neon) {
828 // 'aarch64.neon.*'.
829 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
830 .StartsWith("frintn", Intrinsic::roundeven)
831 .StartsWith("rbit", Intrinsic::bitreverse)
832 .Default(Intrinsic::not_intrinsic);
833 if (ID != Intrinsic::not_intrinsic) {
834 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
835 F->arg_begin()->getType());
836 return true;
837 }
838
839 if (Name.starts_with("addp")) {
840 // 'aarch64.neon.addp*'.
841 if (F->arg_size() != 2)
842 return false; // Invalid IR.
843 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
844 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
845 NewFn = Intrinsic::getOrInsertDeclaration(
846 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
847 return true;
848 }
849 }
850
851 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
852 if (Name.starts_with("bfcvt")) {
853 NewFn = nullptr;
854 return true;
855 }
856
857 return false; // No other 'aarch64.neon.*'.
858 }
859 if (Name.consume_front("sve.")) {
860 // 'aarch64.sve.*'.
861 if (Name.consume_front("bf")) {
862 if (Name.consume_back(".lane")) {
863 // 'aarch64.sve.bf*.lane'.
864 Intrinsic::ID ID =
865 StringSwitch<Intrinsic::ID>(Name)
866 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
867 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
868 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
869 .Default(Intrinsic::not_intrinsic);
870 if (ID != Intrinsic::not_intrinsic) {
871 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
872 return true;
873 }
874 return false; // No other 'aarch64.sve.bf*.lane'.
875 }
876 return false; // No other 'aarch64.sve.bf*'.
877 }
878
879 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
880 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
881 NewFn = nullptr;
882 return true;
883 }
884
885 if (Name.consume_front("addqv")) {
886 // 'aarch64.sve.addqv'.
887 if (!F->getReturnType()->isFPOrFPVectorTy())
888 return false;
889
890 auto Args = F->getFunctionType()->params();
891 Type *Tys[] = {F->getReturnType(), Args[1]};
892 NewFn = Intrinsic::getOrInsertDeclaration(
893 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
894 return true;
895 }
896
897 if (Name.consume_front("ld")) {
898 // 'aarch64.sve.ld*'.
899 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
900 if (LdRegex.match(Name)) {
901 Type *ScalarTy =
902 cast<VectorType>(F->getReturnType())->getElementType();
903 ElementCount EC =
904 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
905 Type *Ty = VectorType::get(ScalarTy, EC);
906 static const Intrinsic::ID LoadIDs[] = {
907 Intrinsic::aarch64_sve_ld2_sret,
908 Intrinsic::aarch64_sve_ld3_sret,
909 Intrinsic::aarch64_sve_ld4_sret,
910 };
911 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
912 LoadIDs[Name[0] - '2'], Ty);
913 return true;
914 }
915 return false; // No other 'aarch64.sve.ld*'.
916 }
917
918 if (Name.consume_front("tuple.")) {
919 // 'aarch64.sve.tuple.*'.
920 if (Name.starts_with("get")) {
921 // 'aarch64.sve.tuple.get*'.
922 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
923 NewFn = Intrinsic::getOrInsertDeclaration(
924 F->getParent(), Intrinsic::vector_extract, Tys);
925 return true;
926 }
927
928 if (Name.starts_with("set")) {
929 // 'aarch64.sve.tuple.set*'.
930 auto Args = F->getFunctionType()->params();
931 Type *Tys[] = {Args[0], Args[2], Args[1]};
932 NewFn = Intrinsic::getOrInsertDeclaration(
933 F->getParent(), Intrinsic::vector_insert, Tys);
934 return true;
935 }
936
937 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
938 if (CreateTupleRegex.match(Name)) {
939 // 'aarch64.sve.tuple.create*'.
940 auto Args = F->getFunctionType()->params();
941 Type *Tys[] = {F->getReturnType(), Args[1]};
942 NewFn = Intrinsic::getOrInsertDeclaration(
943 F->getParent(), Intrinsic::vector_insert, Tys);
944 return true;
945 }
946 return false; // No other 'aarch64.sve.tuple.*'.
947 }
948 return false; // No other 'aarch64.sve.*'.
949 }
950 }
951 return false; // No other 'arm.*', 'aarch64.*'.
952 }
953
shouldUpgradeNVPTXTMAG2SIntrinsics(Function * F,StringRef Name)954 static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
955 StringRef Name) {
956 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
957 Intrinsic::ID ID =
958 StringSwitch<Intrinsic::ID>(Name)
959 .Case("im2col.3d",
960 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
961 .Case("im2col.4d",
962 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
963 .Case("im2col.5d",
964 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
965 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
966 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
967 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
968 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
969 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
970 .Default(Intrinsic::not_intrinsic);
971
972 if (ID == Intrinsic::not_intrinsic)
973 return ID;
974
975 // These intrinsics may need upgrade for two reasons:
976 // (1) When the address-space of the first argument is shared[AS=3]
977 // (and we upgrade it to use shared_cluster address-space[AS=7])
978 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
979 NVPTXAS::ADDRESS_SPACE_SHARED)
980 return ID;
981
982 // (2) When there are only two boolean flag arguments at the end:
983 //
984 // The last three parameters of the older version of these
985 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
986 //
987 // The newer version reads as:
988 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
989 //
990 // So, when the type of the [N-3]rd argument is "not i1", then
991 // it is the older version and we need to upgrade.
992 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
993 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
994 if (!ArgType->isIntegerTy(1))
995 return ID;
996 }
997
998 return Intrinsic::not_intrinsic;
999 }
1000
shouldUpgradeNVPTXSharedClusterIntrinsic(Function * F,StringRef Name)1001 static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1002 StringRef Name) {
1003 if (Name.consume_front("mapa.shared.cluster"))
1004 if (F->getReturnType()->getPointerAddressSpace() ==
1005 NVPTXAS::ADDRESS_SPACE_SHARED)
1006 return Intrinsic::nvvm_mapa_shared_cluster;
1007
1008 if (Name.consume_front("cp.async.bulk.")) {
1009 Intrinsic::ID ID =
1010 StringSwitch<Intrinsic::ID>(Name)
1011 .Case("global.to.shared.cluster",
1012 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1013 .Case("shared.cta.to.cluster",
1014 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1015 .Default(Intrinsic::not_intrinsic);
1016
1017 if (ID != Intrinsic::not_intrinsic)
1018 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1019 NVPTXAS::ADDRESS_SPACE_SHARED)
1020 return ID;
1021 }
1022
1023 return Intrinsic::not_intrinsic;
1024 }
1025
shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)1026 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1027 if (Name.consume_front("fma.rn."))
1028 return StringSwitch<Intrinsic::ID>(Name)
1029 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1030 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1031 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1032 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1033 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1034 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1035 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1036 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1037 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1038 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1039 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1040 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1041 .Default(Intrinsic::not_intrinsic);
1042
1043 if (Name.consume_front("fmax."))
1044 return StringSwitch<Intrinsic::ID>(Name)
1045 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1046 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1047 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1048 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1049 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1050 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1051 .Case("ftz.nan.xorsign.abs.bf16",
1052 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1053 .Case("ftz.nan.xorsign.abs.bf16x2",
1054 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1055 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1056 .Case("ftz.xorsign.abs.bf16x2",
1057 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1058 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1059 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1060 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1061 .Case("nan.xorsign.abs.bf16x2",
1062 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1063 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1064 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1065 .Default(Intrinsic::not_intrinsic);
1066
1067 if (Name.consume_front("fmin."))
1068 return StringSwitch<Intrinsic::ID>(Name)
1069 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1070 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1071 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1072 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1073 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1074 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1075 .Case("ftz.nan.xorsign.abs.bf16",
1076 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1077 .Case("ftz.nan.xorsign.abs.bf16x2",
1078 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1079 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1080 .Case("ftz.xorsign.abs.bf16x2",
1081 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1082 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1083 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1084 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1085 .Case("nan.xorsign.abs.bf16x2",
1086 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1087 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1088 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1089 .Default(Intrinsic::not_intrinsic);
1090
1091 if (Name.consume_front("neg."))
1092 return StringSwitch<Intrinsic::ID>(Name)
1093 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1094 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1095 .Default(Intrinsic::not_intrinsic);
1096
1097 return Intrinsic::not_intrinsic;
1098 }
1099
consumeNVVMPtrAddrSpace(StringRef & Name)1100 static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1101 return Name.consume_front("local") || Name.consume_front("shared") ||
1102 Name.consume_front("global") || Name.consume_front("constant") ||
1103 Name.consume_front("param");
1104 }
1105
upgradeIntrinsicFunction1(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1106 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1107 bool CanUpgradeDebugIntrinsicsToRecords) {
1108 assert(F && "Illegal to upgrade a non-existent Function.");
1109
1110 StringRef Name = F->getName();
1111
1112 // Quickly eliminate it, if it's not a candidate.
1113 if (!Name.consume_front("llvm.") || Name.empty())
1114 return false;
1115
1116 switch (Name[0]) {
1117 default: break;
1118 case 'a': {
1119 bool IsArm = Name.consume_front("arm.");
1120 if (IsArm || Name.consume_front("aarch64.")) {
1121 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1122 return true;
1123 break;
1124 }
1125
1126 if (Name.consume_front("amdgcn.")) {
1127 if (Name == "alignbit") {
1128 // Target specific intrinsic became redundant
1129 NewFn = Intrinsic::getOrInsertDeclaration(
1130 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1131 return true;
1132 }
1133
1134 if (Name.consume_front("atomic.")) {
1135 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1136 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1137 // there's no new declaration.
1138 NewFn = nullptr;
1139 return true;
1140 }
1141 break; // No other 'amdgcn.atomic.*'
1142 }
1143
1144 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1145 Name.consume_front("flat.atomic.")) {
1146 if (Name.starts_with("fadd") ||
1147 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1148 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1149 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1150 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1151 // declaration.
1152 NewFn = nullptr;
1153 return true;
1154 }
1155 }
1156
1157 if (Name.starts_with("ldexp.")) {
1158 // Target specific intrinsic became redundant
1159 NewFn = Intrinsic::getOrInsertDeclaration(
1160 F->getParent(), Intrinsic::ldexp,
1161 {F->getReturnType(), F->getArg(1)->getType()});
1162 return true;
1163 }
1164 break; // No other 'amdgcn.*'
1165 }
1166
1167 break;
1168 }
1169 case 'c': {
1170 if (F->arg_size() == 1) {
1171 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1172 .StartsWith("ctlz.", Intrinsic::ctlz)
1173 .StartsWith("cttz.", Intrinsic::cttz)
1174 .Default(Intrinsic::not_intrinsic);
1175 if (ID != Intrinsic::not_intrinsic) {
1176 rename(F);
1177 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1178 F->arg_begin()->getType());
1179 return true;
1180 }
1181 }
1182
1183 if (F->arg_size() == 2 && Name == "coro.end") {
1184 rename(F);
1185 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1186 Intrinsic::coro_end);
1187 return true;
1188 }
1189
1190 break;
1191 }
1192 case 'd':
1193 if (Name.consume_front("dbg.")) {
1194 // Mark debug intrinsics for upgrade to new debug format.
1195 if (CanUpgradeDebugIntrinsicsToRecords) {
1196 if (Name == "addr" || Name == "value" || Name == "assign" ||
1197 Name == "declare" || Name == "label") {
1198 // There's no function to replace these with.
1199 NewFn = nullptr;
1200 // But we do want these to get upgraded.
1201 return true;
1202 }
1203 }
1204 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1205 // converted to DbgVariableRecords later.
1206 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1207 rename(F);
1208 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1209 Intrinsic::dbg_value);
1210 return true;
1211 }
1212 break; // No other 'dbg.*'.
1213 }
1214 break;
1215 case 'e':
1216 if (Name.consume_front("experimental.vector.")) {
1217 Intrinsic::ID ID =
1218 StringSwitch<Intrinsic::ID>(Name)
1219 // Skip over extract.last.active, otherwise it will be 'upgraded'
1220 // to a regular vector extract which is a different operation.
1221 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1222 .StartsWith("extract.", Intrinsic::vector_extract)
1223 .StartsWith("insert.", Intrinsic::vector_insert)
1224 .StartsWith("splice.", Intrinsic::vector_splice)
1225 .StartsWith("reverse.", Intrinsic::vector_reverse)
1226 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1227 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1228 .Default(Intrinsic::not_intrinsic);
1229 if (ID != Intrinsic::not_intrinsic) {
1230 const auto *FT = F->getFunctionType();
1231 SmallVector<Type *, 2> Tys;
1232 if (ID == Intrinsic::vector_extract ||
1233 ID == Intrinsic::vector_interleave2)
1234 // Extracting overloads the return type.
1235 Tys.push_back(FT->getReturnType());
1236 if (ID != Intrinsic::vector_interleave2)
1237 Tys.push_back(FT->getParamType(0));
1238 if (ID == Intrinsic::vector_insert)
1239 // Inserting overloads the inserted type.
1240 Tys.push_back(FT->getParamType(1));
1241 rename(F);
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1243 return true;
1244 }
1245
1246 if (Name.consume_front("reduce.")) {
1247 SmallVector<StringRef, 2> Groups;
1248 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1249 if (R.match(Name, &Groups))
1250 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1251 .Case("add", Intrinsic::vector_reduce_add)
1252 .Case("mul", Intrinsic::vector_reduce_mul)
1253 .Case("and", Intrinsic::vector_reduce_and)
1254 .Case("or", Intrinsic::vector_reduce_or)
1255 .Case("xor", Intrinsic::vector_reduce_xor)
1256 .Case("smax", Intrinsic::vector_reduce_smax)
1257 .Case("smin", Intrinsic::vector_reduce_smin)
1258 .Case("umax", Intrinsic::vector_reduce_umax)
1259 .Case("umin", Intrinsic::vector_reduce_umin)
1260 .Case("fmax", Intrinsic::vector_reduce_fmax)
1261 .Case("fmin", Intrinsic::vector_reduce_fmin)
1262 .Default(Intrinsic::not_intrinsic);
1263
1264 bool V2 = false;
1265 if (ID == Intrinsic::not_intrinsic) {
1266 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1267 Groups.clear();
1268 V2 = true;
1269 if (R2.match(Name, &Groups))
1270 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1271 .Case("fadd", Intrinsic::vector_reduce_fadd)
1272 .Case("fmul", Intrinsic::vector_reduce_fmul)
1273 .Default(Intrinsic::not_intrinsic);
1274 }
1275 if (ID != Intrinsic::not_intrinsic) {
1276 rename(F);
1277 auto Args = F->getFunctionType()->params();
1278 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1279 {Args[V2 ? 1 : 0]});
1280 return true;
1281 }
1282 break; // No other 'expermental.vector.reduce.*'.
1283 }
1284 break; // No other 'experimental.vector.*'.
1285 }
1286 if (Name.consume_front("experimental.stepvector.")) {
1287 Intrinsic::ID ID = Intrinsic::stepvector;
1288 rename(F);
1289 NewFn = Intrinsic::getOrInsertDeclaration(
1290 F->getParent(), ID, F->getFunctionType()->getReturnType());
1291 return true;
1292 }
1293 break; // No other 'e*'.
1294 case 'f':
1295 if (Name.starts_with("flt.rounds")) {
1296 rename(F);
1297 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1298 Intrinsic::get_rounding);
1299 return true;
1300 }
1301 break;
1302 case 'i':
1303 if (Name.starts_with("invariant.group.barrier")) {
1304 // Rename invariant.group.barrier to launder.invariant.group
1305 auto Args = F->getFunctionType()->params();
1306 Type* ObjectPtr[1] = {Args[0]};
1307 rename(F);
1308 NewFn = Intrinsic::getOrInsertDeclaration(
1309 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1310 return true;
1311 }
1312 break;
1313 case 'm': {
1314 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1315 // alignment parameter to embedding the alignment as an attribute of
1316 // the pointer args.
1317 if (unsigned ID = StringSwitch<unsigned>(Name)
1318 .StartsWith("memcpy.", Intrinsic::memcpy)
1319 .StartsWith("memmove.", Intrinsic::memmove)
1320 .Default(0)) {
1321 if (F->arg_size() == 5) {
1322 rename(F);
1323 // Get the types of dest, src, and len
1324 ArrayRef<Type *> ParamTypes =
1325 F->getFunctionType()->params().slice(0, 3);
1326 NewFn =
1327 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1328 return true;
1329 }
1330 }
1331 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1332 rename(F);
1333 // Get the types of dest, and len
1334 const auto *FT = F->getFunctionType();
1335 Type *ParamTypes[2] = {
1336 FT->getParamType(0), // Dest
1337 FT->getParamType(2) // len
1338 };
1339 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1340 Intrinsic::memset, ParamTypes);
1341 return true;
1342 }
1343 break;
1344 }
1345 case 'n': {
1346 if (Name.consume_front("nvvm.")) {
1347 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1348 if (F->arg_size() == 1) {
1349 Intrinsic::ID IID =
1350 StringSwitch<Intrinsic::ID>(Name)
1351 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1352 .Case("clz.i", Intrinsic::ctlz)
1353 .Case("popc.i", Intrinsic::ctpop)
1354 .Default(Intrinsic::not_intrinsic);
1355 if (IID != Intrinsic::not_intrinsic) {
1356 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1357 {F->getReturnType()});
1358 return true;
1359 }
1360 }
1361
1362 // Check for nvvm intrinsics that need a return type adjustment.
1363 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1364 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1365 if (IID != Intrinsic::not_intrinsic) {
1366 NewFn = nullptr;
1367 return true;
1368 }
1369 }
1370
1371 // Upgrade Distributed Shared Memory Intrinsics
1372 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1373 if (IID != Intrinsic::not_intrinsic) {
1374 rename(F);
1375 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1376 return true;
1377 }
1378
1379 // Upgrade TMA copy G2S Intrinsics
1380 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1381 if (IID != Intrinsic::not_intrinsic) {
1382 rename(F);
1383 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1384 return true;
1385 }
1386
1387 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1388 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1389 //
1390 // TODO: We could add lohi.i2d.
1391 bool Expand = false;
1392 if (Name.consume_front("abs."))
1393 // nvvm.abs.{i,ii}
1394 Expand =
1395 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1396 else if (Name.consume_front("fabs."))
1397 // nvvm.fabs.{f,ftz.f,d}
1398 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1399 else if (Name.consume_front("max.") || Name.consume_front("min."))
1400 // nvvm.{min,max}.{i,ii,ui,ull}
1401 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1402 Name == "ui" || Name == "ull";
1403 else if (Name.consume_front("atomic.load."))
1404 // nvvm.atomic.load.add.{f32,f64}.p
1405 // nvvm.atomic.load.{inc,dec}.32.p
1406 Expand = StringSwitch<bool>(Name)
1407 .StartsWith("add.f32.p", true)
1408 .StartsWith("add.f64.p", true)
1409 .StartsWith("inc.32.p", true)
1410 .StartsWith("dec.32.p", true)
1411 .Default(false);
1412 else if (Name.consume_front("bitcast."))
1413 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1414 Expand =
1415 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1416 else if (Name.consume_front("rotate."))
1417 // nvvm.rotate.{b32,b64,right.b64}
1418 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1419 else if (Name.consume_front("ptr.gen.to."))
1420 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1421 Expand = consumeNVVMPtrAddrSpace(Name);
1422 else if (Name.consume_front("ptr."))
1423 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1424 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1425 else if (Name.consume_front("ldg.global."))
1426 // nvvm.ldg.global.{i,p,f}
1427 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1428 Name.starts_with("p."));
1429 else
1430 Expand = StringSwitch<bool>(Name)
1431 .Case("barrier0", true)
1432 .Case("barrier.n", true)
1433 .Case("barrier.sync.cnt", true)
1434 .Case("barrier.sync", true)
1435 .Case("barrier", true)
1436 .Case("bar.sync", true)
1437 .Case("clz.ll", true)
1438 .Case("popc.ll", true)
1439 .Case("h2f", true)
1440 .Case("swap.lo.hi.b64", true)
1441 .Default(false);
1442
1443 if (Expand) {
1444 NewFn = nullptr;
1445 return true;
1446 }
1447 break; // No other 'nvvm.*'.
1448 }
1449 break;
1450 }
1451 case 'o':
1452 if (Name.starts_with("objectsize.")) {
1453 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1454 if (F->arg_size() == 2 || F->arg_size() == 3) {
1455 rename(F);
1456 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1457 Intrinsic::objectsize, Tys);
1458 return true;
1459 }
1460 }
1461 break;
1462
1463 case 'p':
1464 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1465 rename(F);
1466 NewFn = Intrinsic::getOrInsertDeclaration(
1467 F->getParent(), Intrinsic::ptr_annotation,
1468 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1469 return true;
1470 }
1471 break;
1472
1473 case 'r': {
1474 if (Name.consume_front("riscv.")) {
1475 Intrinsic::ID ID;
1476 ID = StringSwitch<Intrinsic::ID>(Name)
1477 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1478 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1479 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1480 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1481 .Default(Intrinsic::not_intrinsic);
1482 if (ID != Intrinsic::not_intrinsic) {
1483 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1484 rename(F);
1485 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1486 return true;
1487 }
1488 break; // No other applicable upgrades.
1489 }
1490
1491 ID = StringSwitch<Intrinsic::ID>(Name)
1492 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1493 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1494 .Default(Intrinsic::not_intrinsic);
1495 if (ID != Intrinsic::not_intrinsic) {
1496 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1497 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1498 rename(F);
1499 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1500 return true;
1501 }
1502 break; // No other applicable upgrades.
1503 }
1504
1505 ID = StringSwitch<Intrinsic::ID>(Name)
1506 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1507 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1508 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1509 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1510 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1511 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1512 .Default(Intrinsic::not_intrinsic);
1513 if (ID != Intrinsic::not_intrinsic) {
1514 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1515 rename(F);
1516 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1517 return true;
1518 }
1519 break; // No other applicable upgrades.
1520 }
1521 break; // No other 'riscv.*' intrinsics
1522 }
1523 } break;
1524
1525 case 's':
1526 if (Name == "stackprotectorcheck") {
1527 NewFn = nullptr;
1528 return true;
1529 }
1530 break;
1531
1532 case 't':
1533 if (Name == "thread.pointer") {
1534 NewFn = Intrinsic::getOrInsertDeclaration(
1535 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1536 return true;
1537 }
1538 break;
1539
1540 case 'v': {
1541 if (Name == "var.annotation" && F->arg_size() == 4) {
1542 rename(F);
1543 NewFn = Intrinsic::getOrInsertDeclaration(
1544 F->getParent(), Intrinsic::var_annotation,
1545 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1546 return true;
1547 }
1548 break;
1549 }
1550
1551 case 'w':
1552 if (Name.consume_front("wasm.")) {
1553 Intrinsic::ID ID =
1554 StringSwitch<Intrinsic::ID>(Name)
1555 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1556 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1557 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1558 .Default(Intrinsic::not_intrinsic);
1559 if (ID != Intrinsic::not_intrinsic) {
1560 rename(F);
1561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1562 F->getReturnType());
1563 return true;
1564 }
1565
1566 if (Name.consume_front("dot.i8x16.i7x16.")) {
1567 ID = StringSwitch<Intrinsic::ID>(Name)
1568 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1569 .Case("add.signed",
1570 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1571 .Default(Intrinsic::not_intrinsic);
1572 if (ID != Intrinsic::not_intrinsic) {
1573 rename(F);
1574 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1575 return true;
1576 }
1577 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1578 }
1579 break; // No other 'wasm.*'.
1580 }
1581 break;
1582
1583 case 'x':
1584 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1585 return true;
1586 }
1587
1588 auto *ST = dyn_cast<StructType>(F->getReturnType());
1589 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1590 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1591 // Replace return type with literal non-packed struct. Only do this for
1592 // intrinsics declared to return a struct, not for intrinsics with
1593 // overloaded return type, in which case the exact struct type will be
1594 // mangled into the name.
1595 SmallVector<Intrinsic::IITDescriptor> Desc;
1596 Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1597 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1598 auto *FT = F->getFunctionType();
1599 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1600 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1601 std::string Name = F->getName().str();
1602 rename(F);
1603 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1604 Name, F->getParent());
1605
1606 // The new function may also need remangling.
1607 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1608 NewFn = *Result;
1609 return true;
1610 }
1611 }
1612
1613 // Remangle our intrinsic since we upgrade the mangling
1614 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1615 if (Result != std::nullopt) {
1616 NewFn = *Result;
1617 return true;
1618 }
1619
1620 // This may not belong here. This function is effectively being overloaded
1621 // to both detect an intrinsic which needs upgrading, and to provide the
1622 // upgraded form of the intrinsic. We should perhaps have two separate
1623 // functions for this.
1624 return false;
1625 }
1626
UpgradeIntrinsicFunction(Function * F,Function * & NewFn,bool CanUpgradeDebugIntrinsicsToRecords)1627 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1628 bool CanUpgradeDebugIntrinsicsToRecords) {
1629 NewFn = nullptr;
1630 bool Upgraded =
1631 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1632 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1633
1634 // Upgrade intrinsic attributes. This does not change the function.
1635 if (NewFn)
1636 F = NewFn;
1637 if (Intrinsic::ID id = F->getIntrinsicID()) {
1638 // Only do this if the intrinsic signature is valid.
1639 SmallVector<Type *> OverloadTys;
1640 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1641 F->setAttributes(
1642 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1643 }
1644 return Upgraded;
1645 }
1646
UpgradeGlobalVariable(GlobalVariable * GV)1647 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1648 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1649 GV->getName() == "llvm.global_dtors")) ||
1650 !GV->hasInitializer())
1651 return nullptr;
1652 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1653 if (!ATy)
1654 return nullptr;
1655 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1656 if (!STy || STy->getNumElements() != 2)
1657 return nullptr;
1658
1659 LLVMContext &C = GV->getContext();
1660 IRBuilder<> IRB(C);
1661 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1662 IRB.getPtrTy());
1663 Constant *Init = GV->getInitializer();
1664 unsigned N = Init->getNumOperands();
1665 std::vector<Constant *> NewCtors(N);
1666 for (unsigned i = 0; i != N; ++i) {
1667 auto Ctor = cast<Constant>(Init->getOperand(i));
1668 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1669 Ctor->getAggregateElement(1),
1670 ConstantPointerNull::get(IRB.getPtrTy()));
1671 }
1672 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1673
1674 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1675 NewInit, GV->getName());
1676 }
1677
1678 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1679 // to byte shuffles.
upgradeX86PSLLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1680 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1681 unsigned Shift) {
1682 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1683 unsigned NumElts = ResultTy->getNumElements() * 8;
1684
1685 // Bitcast from a 64-bit element type to a byte element type.
1686 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1687 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1688
1689 // We'll be shuffling in zeroes.
1690 Value *Res = Constant::getNullValue(VecTy);
1691
1692 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1693 // we'll just return the zero vector.
1694 if (Shift < 16) {
1695 int Idxs[64];
1696 // 256/512-bit version is split into 2/4 16-byte lanes.
1697 for (unsigned l = 0; l != NumElts; l += 16)
1698 for (unsigned i = 0; i != 16; ++i) {
1699 unsigned Idx = NumElts + i - Shift;
1700 if (Idx < NumElts)
1701 Idx -= NumElts - 16; // end of lane, switch operand.
1702 Idxs[l + i] = Idx + l;
1703 }
1704
1705 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1706 }
1707
1708 // Bitcast back to a 64-bit element type.
1709 return Builder.CreateBitCast(Res, ResultTy, "cast");
1710 }
1711
1712 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1713 // to byte shuffles.
upgradeX86PSRLDQIntrinsics(IRBuilder<> & Builder,Value * Op,unsigned Shift)1714 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1715 unsigned Shift) {
1716 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1717 unsigned NumElts = ResultTy->getNumElements() * 8;
1718
1719 // Bitcast from a 64-bit element type to a byte element type.
1720 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1721 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1722
1723 // We'll be shuffling in zeroes.
1724 Value *Res = Constant::getNullValue(VecTy);
1725
1726 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1727 // we'll just return the zero vector.
1728 if (Shift < 16) {
1729 int Idxs[64];
1730 // 256/512-bit version is split into 2/4 16-byte lanes.
1731 for (unsigned l = 0; l != NumElts; l += 16)
1732 for (unsigned i = 0; i != 16; ++i) {
1733 unsigned Idx = i + Shift;
1734 if (Idx >= 16)
1735 Idx += NumElts - 16; // end of lane, switch operand.
1736 Idxs[l + i] = Idx + l;
1737 }
1738
1739 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1740 }
1741
1742 // Bitcast back to a 64-bit element type.
1743 return Builder.CreateBitCast(Res, ResultTy, "cast");
1744 }
1745
getX86MaskVec(IRBuilder<> & Builder,Value * Mask,unsigned NumElts)1746 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1747 unsigned NumElts) {
1748 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1749 llvm::VectorType *MaskTy = FixedVectorType::get(
1750 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1751 Mask = Builder.CreateBitCast(Mask, MaskTy);
1752
1753 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1754 // i8 and we need to extract down to the right number of elements.
1755 if (NumElts <= 4) {
1756 int Indices[4];
1757 for (unsigned i = 0; i != NumElts; ++i)
1758 Indices[i] = i;
1759 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1760 "extract");
1761 }
1762
1763 return Mask;
1764 }
1765
emitX86Select(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1766 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1767 Value *Op1) {
1768 // If the mask is all ones just emit the first operation.
1769 if (const auto *C = dyn_cast<Constant>(Mask))
1770 if (C->isAllOnesValue())
1771 return Op0;
1772
1773 Mask = getX86MaskVec(Builder, Mask,
1774 cast<FixedVectorType>(Op0->getType())->getNumElements());
1775 return Builder.CreateSelect(Mask, Op0, Op1);
1776 }
1777
emitX86ScalarSelect(IRBuilder<> & Builder,Value * Mask,Value * Op0,Value * Op1)1778 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1779 Value *Op1) {
1780 // If the mask is all ones just emit the first operation.
1781 if (const auto *C = dyn_cast<Constant>(Mask))
1782 if (C->isAllOnesValue())
1783 return Op0;
1784
1785 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1786 Mask->getType()->getIntegerBitWidth());
1787 Mask = Builder.CreateBitCast(Mask, MaskTy);
1788 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1789 return Builder.CreateSelect(Mask, Op0, Op1);
1790 }
1791
1792 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1793 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1794 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
upgradeX86ALIGNIntrinsics(IRBuilder<> & Builder,Value * Op0,Value * Op1,Value * Shift,Value * Passthru,Value * Mask,bool IsVALIGN)1795 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1796 Value *Op1, Value *Shift,
1797 Value *Passthru, Value *Mask,
1798 bool IsVALIGN) {
1799 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1800
1801 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1802 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1803 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1804 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1805
1806 // Mask the immediate for VALIGN.
1807 if (IsVALIGN)
1808 ShiftVal &= (NumElts - 1);
1809
1810 // If palignr is shifting the pair of vectors more than the size of two
1811 // lanes, emit zero.
1812 if (ShiftVal >= 32)
1813 return llvm::Constant::getNullValue(Op0->getType());
1814
1815 // If palignr is shifting the pair of input vectors more than one lane,
1816 // but less than two lanes, convert to shifting in zeroes.
1817 if (ShiftVal > 16) {
1818 ShiftVal -= 16;
1819 Op1 = Op0;
1820 Op0 = llvm::Constant::getNullValue(Op0->getType());
1821 }
1822
1823 int Indices[64];
1824 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1825 for (unsigned l = 0; l < NumElts; l += 16) {
1826 for (unsigned i = 0; i != 16; ++i) {
1827 unsigned Idx = ShiftVal + i;
1828 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1829 Idx += NumElts - 16; // End of lane, switch operand.
1830 Indices[l + i] = Idx + l;
1831 }
1832 }
1833
1834 Value *Align = Builder.CreateShuffleVector(
1835 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1836
1837 return emitX86Select(Builder, Mask, Align, Passthru);
1838 }
1839
upgradeX86VPERMT2Intrinsics(IRBuilder<> & Builder,CallBase & CI,bool ZeroMask,bool IndexForm)1840 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1841 bool ZeroMask, bool IndexForm) {
1842 Type *Ty = CI.getType();
1843 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1844 unsigned EltWidth = Ty->getScalarSizeInBits();
1845 bool IsFloat = Ty->isFPOrFPVectorTy();
1846 Intrinsic::ID IID;
1847 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1848 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1849 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1850 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1851 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1852 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1853 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1854 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1855 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1856 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1857 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1858 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1859 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1860 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1861 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1862 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1863 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1864 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1865 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1866 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1867 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1868 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1869 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1870 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1871 else if (VecWidth == 128 && EltWidth == 16)
1872 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1873 else if (VecWidth == 256 && EltWidth == 16)
1874 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1875 else if (VecWidth == 512 && EltWidth == 16)
1876 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1877 else if (VecWidth == 128 && EltWidth == 8)
1878 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1879 else if (VecWidth == 256 && EltWidth == 8)
1880 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1881 else if (VecWidth == 512 && EltWidth == 8)
1882 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1883 else
1884 llvm_unreachable("Unexpected intrinsic");
1885
1886 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1887 CI.getArgOperand(2) };
1888
1889 // If this isn't index form we need to swap operand 0 and 1.
1890 if (!IndexForm)
1891 std::swap(Args[0], Args[1]);
1892
1893 Value *V = Builder.CreateIntrinsic(IID, Args);
1894 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1895 : Builder.CreateBitCast(CI.getArgOperand(1),
1896 Ty);
1897 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1898 }
1899
upgradeX86BinaryIntrinsics(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)1900 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1901 Intrinsic::ID IID) {
1902 Type *Ty = CI.getType();
1903 Value *Op0 = CI.getOperand(0);
1904 Value *Op1 = CI.getOperand(1);
1905 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1906
1907 if (CI.arg_size() == 4) { // For masked intrinsics.
1908 Value *VecSrc = CI.getOperand(2);
1909 Value *Mask = CI.getOperand(3);
1910 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1911 }
1912 return Res;
1913 }
1914
upgradeX86Rotate(IRBuilder<> & Builder,CallBase & CI,bool IsRotateRight)1915 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1916 bool IsRotateRight) {
1917 Type *Ty = CI.getType();
1918 Value *Src = CI.getArgOperand(0);
1919 Value *Amt = CI.getArgOperand(1);
1920
1921 // Amount may be scalar immediate, in which case create a splat vector.
1922 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1923 // we only care about the lowest log2 bits anyway.
1924 if (Amt->getType() != Ty) {
1925 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1926 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1927 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1928 }
1929
1930 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1931 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1932
1933 if (CI.arg_size() == 4) { // For masked intrinsics.
1934 Value *VecSrc = CI.getOperand(2);
1935 Value *Mask = CI.getOperand(3);
1936 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1937 }
1938 return Res;
1939 }
1940
upgradeX86vpcom(IRBuilder<> & Builder,CallBase & CI,unsigned Imm,bool IsSigned)1941 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1942 bool IsSigned) {
1943 Type *Ty = CI.getType();
1944 Value *LHS = CI.getArgOperand(0);
1945 Value *RHS = CI.getArgOperand(1);
1946
1947 CmpInst::Predicate Pred;
1948 switch (Imm) {
1949 case 0x0:
1950 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1951 break;
1952 case 0x1:
1953 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1954 break;
1955 case 0x2:
1956 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1957 break;
1958 case 0x3:
1959 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1960 break;
1961 case 0x4:
1962 Pred = ICmpInst::ICMP_EQ;
1963 break;
1964 case 0x5:
1965 Pred = ICmpInst::ICMP_NE;
1966 break;
1967 case 0x6:
1968 return Constant::getNullValue(Ty); // FALSE
1969 case 0x7:
1970 return Constant::getAllOnesValue(Ty); // TRUE
1971 default:
1972 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1973 }
1974
1975 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1976 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1977 return Ext;
1978 }
1979
upgradeX86ConcatShift(IRBuilder<> & Builder,CallBase & CI,bool IsShiftRight,bool ZeroMask)1980 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1981 bool IsShiftRight, bool ZeroMask) {
1982 Type *Ty = CI.getType();
1983 Value *Op0 = CI.getArgOperand(0);
1984 Value *Op1 = CI.getArgOperand(1);
1985 Value *Amt = CI.getArgOperand(2);
1986
1987 if (IsShiftRight)
1988 std::swap(Op0, Op1);
1989
1990 // Amount may be scalar immediate, in which case create a splat vector.
1991 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1992 // we only care about the lowest log2 bits anyway.
1993 if (Amt->getType() != Ty) {
1994 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1995 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1996 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1997 }
1998
1999 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2000 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2001
2002 unsigned NumArgs = CI.arg_size();
2003 if (NumArgs >= 4) { // For masked intrinsics.
2004 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2005 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2006 CI.getArgOperand(0);
2007 Value *Mask = CI.getOperand(NumArgs - 1);
2008 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2009 }
2010 return Res;
2011 }
2012
upgradeMaskedStore(IRBuilder<> & Builder,Value * Ptr,Value * Data,Value * Mask,bool Aligned)2013 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2014 Value *Mask, bool Aligned) {
2015 const Align Alignment =
2016 Aligned
2017 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2018 : Align(1);
2019
2020 // If the mask is all ones just emit a regular store.
2021 if (const auto *C = dyn_cast<Constant>(Mask))
2022 if (C->isAllOnesValue())
2023 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2024
2025 // Convert the mask from an integer type to a vector of i1.
2026 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2027 Mask = getX86MaskVec(Builder, Mask, NumElts);
2028 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2029 }
2030
upgradeMaskedLoad(IRBuilder<> & Builder,Value * Ptr,Value * Passthru,Value * Mask,bool Aligned)2031 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2032 Value *Passthru, Value *Mask, bool Aligned) {
2033 Type *ValTy = Passthru->getType();
2034 const Align Alignment =
2035 Aligned
2036 ? Align(
2037 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2038 8)
2039 : Align(1);
2040
2041 // If the mask is all ones just emit a regular store.
2042 if (const auto *C = dyn_cast<Constant>(Mask))
2043 if (C->isAllOnesValue())
2044 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2045
2046 // Convert the mask from an integer type to a vector of i1.
2047 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2048 Mask = getX86MaskVec(Builder, Mask, NumElts);
2049 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2050 }
2051
upgradeAbs(IRBuilder<> & Builder,CallBase & CI)2052 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2053 Type *Ty = CI.getType();
2054 Value *Op0 = CI.getArgOperand(0);
2055 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2056 {Op0, Builder.getInt1(false)});
2057 if (CI.arg_size() == 3)
2058 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2059 return Res;
2060 }
2061
upgradePMULDQ(IRBuilder<> & Builder,CallBase & CI,bool IsSigned)2062 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2063 Type *Ty = CI.getType();
2064
2065 // Arguments have a vXi32 type so cast to vXi64.
2066 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2067 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2068
2069 if (IsSigned) {
2070 // Shift left then arithmetic shift right.
2071 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2072 LHS = Builder.CreateShl(LHS, ShiftAmt);
2073 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2074 RHS = Builder.CreateShl(RHS, ShiftAmt);
2075 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2076 } else {
2077 // Clear the upper bits.
2078 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2079 LHS = Builder.CreateAnd(LHS, Mask);
2080 RHS = Builder.CreateAnd(RHS, Mask);
2081 }
2082
2083 Value *Res = Builder.CreateMul(LHS, RHS);
2084
2085 if (CI.arg_size() == 4)
2086 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2087
2088 return Res;
2089 }
2090
2091 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
applyX86MaskOn1BitsVec(IRBuilder<> & Builder,Value * Vec,Value * Mask)2092 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2093 Value *Mask) {
2094 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2095 if (Mask) {
2096 const auto *C = dyn_cast<Constant>(Mask);
2097 if (!C || !C->isAllOnesValue())
2098 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2099 }
2100
2101 if (NumElts < 8) {
2102 int Indices[8];
2103 for (unsigned i = 0; i != NumElts; ++i)
2104 Indices[i] = i;
2105 for (unsigned i = NumElts; i != 8; ++i)
2106 Indices[i] = NumElts + i % NumElts;
2107 Vec = Builder.CreateShuffleVector(Vec,
2108 Constant::getNullValue(Vec->getType()),
2109 Indices);
2110 }
2111 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2112 }
2113
upgradeMaskedCompare(IRBuilder<> & Builder,CallBase & CI,unsigned CC,bool Signed)2114 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2115 unsigned CC, bool Signed) {
2116 Value *Op0 = CI.getArgOperand(0);
2117 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2118
2119 Value *Cmp;
2120 if (CC == 3) {
2121 Cmp = Constant::getNullValue(
2122 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2123 } else if (CC == 7) {
2124 Cmp = Constant::getAllOnesValue(
2125 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2126 } else {
2127 ICmpInst::Predicate Pred;
2128 switch (CC) {
2129 default: llvm_unreachable("Unknown condition code");
2130 case 0: Pred = ICmpInst::ICMP_EQ; break;
2131 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2132 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2133 case 4: Pred = ICmpInst::ICMP_NE; break;
2134 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2135 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2136 }
2137 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2138 }
2139
2140 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2141
2142 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2143 }
2144
2145 // Replace a masked intrinsic with an older unmasked intrinsic.
upgradeX86MaskedShift(IRBuilder<> & Builder,CallBase & CI,Intrinsic::ID IID)2146 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2147 Intrinsic::ID IID) {
2148 Value *Rep =
2149 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2150 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2151 }
2152
upgradeMaskedMove(IRBuilder<> & Builder,CallBase & CI)2153 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2154 Value* A = CI.getArgOperand(0);
2155 Value* B = CI.getArgOperand(1);
2156 Value* Src = CI.getArgOperand(2);
2157 Value* Mask = CI.getArgOperand(3);
2158
2159 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2160 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2161 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2162 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2163 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2164 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2165 }
2166
upgradeMaskToInt(IRBuilder<> & Builder,CallBase & CI)2167 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2168 Value* Op = CI.getArgOperand(0);
2169 Type* ReturnOp = CI.getType();
2170 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2171 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2172 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2173 }
2174
2175 // Replace intrinsic with unmasked version and a select.
upgradeAVX512MaskToSelect(StringRef Name,IRBuilder<> & Builder,CallBase & CI,Value * & Rep)2176 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2177 CallBase &CI, Value *&Rep) {
2178 Name = Name.substr(12); // Remove avx512.mask.
2179
2180 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2181 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2182 Intrinsic::ID IID;
2183 if (Name.starts_with("max.p")) {
2184 if (VecWidth == 128 && EltWidth == 32)
2185 IID = Intrinsic::x86_sse_max_ps;
2186 else if (VecWidth == 128 && EltWidth == 64)
2187 IID = Intrinsic::x86_sse2_max_pd;
2188 else if (VecWidth == 256 && EltWidth == 32)
2189 IID = Intrinsic::x86_avx_max_ps_256;
2190 else if (VecWidth == 256 && EltWidth == 64)
2191 IID = Intrinsic::x86_avx_max_pd_256;
2192 else
2193 llvm_unreachable("Unexpected intrinsic");
2194 } else if (Name.starts_with("min.p")) {
2195 if (VecWidth == 128 && EltWidth == 32)
2196 IID = Intrinsic::x86_sse_min_ps;
2197 else if (VecWidth == 128 && EltWidth == 64)
2198 IID = Intrinsic::x86_sse2_min_pd;
2199 else if (VecWidth == 256 && EltWidth == 32)
2200 IID = Intrinsic::x86_avx_min_ps_256;
2201 else if (VecWidth == 256 && EltWidth == 64)
2202 IID = Intrinsic::x86_avx_min_pd_256;
2203 else
2204 llvm_unreachable("Unexpected intrinsic");
2205 } else if (Name.starts_with("pshuf.b.")) {
2206 if (VecWidth == 128)
2207 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2208 else if (VecWidth == 256)
2209 IID = Intrinsic::x86_avx2_pshuf_b;
2210 else if (VecWidth == 512)
2211 IID = Intrinsic::x86_avx512_pshuf_b_512;
2212 else
2213 llvm_unreachable("Unexpected intrinsic");
2214 } else if (Name.starts_with("pmul.hr.sw.")) {
2215 if (VecWidth == 128)
2216 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2217 else if (VecWidth == 256)
2218 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2219 else if (VecWidth == 512)
2220 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else if (Name.starts_with("pmulh.w.")) {
2224 if (VecWidth == 128)
2225 IID = Intrinsic::x86_sse2_pmulh_w;
2226 else if (VecWidth == 256)
2227 IID = Intrinsic::x86_avx2_pmulh_w;
2228 else if (VecWidth == 512)
2229 IID = Intrinsic::x86_avx512_pmulh_w_512;
2230 else
2231 llvm_unreachable("Unexpected intrinsic");
2232 } else if (Name.starts_with("pmulhu.w.")) {
2233 if (VecWidth == 128)
2234 IID = Intrinsic::x86_sse2_pmulhu_w;
2235 else if (VecWidth == 256)
2236 IID = Intrinsic::x86_avx2_pmulhu_w;
2237 else if (VecWidth == 512)
2238 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2239 else
2240 llvm_unreachable("Unexpected intrinsic");
2241 } else if (Name.starts_with("pmaddw.d.")) {
2242 if (VecWidth == 128)
2243 IID = Intrinsic::x86_sse2_pmadd_wd;
2244 else if (VecWidth == 256)
2245 IID = Intrinsic::x86_avx2_pmadd_wd;
2246 else if (VecWidth == 512)
2247 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2248 else
2249 llvm_unreachable("Unexpected intrinsic");
2250 } else if (Name.starts_with("pmaddubs.w.")) {
2251 if (VecWidth == 128)
2252 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2253 else if (VecWidth == 256)
2254 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2255 else if (VecWidth == 512)
2256 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2257 else
2258 llvm_unreachable("Unexpected intrinsic");
2259 } else if (Name.starts_with("packsswb.")) {
2260 if (VecWidth == 128)
2261 IID = Intrinsic::x86_sse2_packsswb_128;
2262 else if (VecWidth == 256)
2263 IID = Intrinsic::x86_avx2_packsswb;
2264 else if (VecWidth == 512)
2265 IID = Intrinsic::x86_avx512_packsswb_512;
2266 else
2267 llvm_unreachable("Unexpected intrinsic");
2268 } else if (Name.starts_with("packssdw.")) {
2269 if (VecWidth == 128)
2270 IID = Intrinsic::x86_sse2_packssdw_128;
2271 else if (VecWidth == 256)
2272 IID = Intrinsic::x86_avx2_packssdw;
2273 else if (VecWidth == 512)
2274 IID = Intrinsic::x86_avx512_packssdw_512;
2275 else
2276 llvm_unreachable("Unexpected intrinsic");
2277 } else if (Name.starts_with("packuswb.")) {
2278 if (VecWidth == 128)
2279 IID = Intrinsic::x86_sse2_packuswb_128;
2280 else if (VecWidth == 256)
2281 IID = Intrinsic::x86_avx2_packuswb;
2282 else if (VecWidth == 512)
2283 IID = Intrinsic::x86_avx512_packuswb_512;
2284 else
2285 llvm_unreachable("Unexpected intrinsic");
2286 } else if (Name.starts_with("packusdw.")) {
2287 if (VecWidth == 128)
2288 IID = Intrinsic::x86_sse41_packusdw;
2289 else if (VecWidth == 256)
2290 IID = Intrinsic::x86_avx2_packusdw;
2291 else if (VecWidth == 512)
2292 IID = Intrinsic::x86_avx512_packusdw_512;
2293 else
2294 llvm_unreachable("Unexpected intrinsic");
2295 } else if (Name.starts_with("vpermilvar.")) {
2296 if (VecWidth == 128 && EltWidth == 32)
2297 IID = Intrinsic::x86_avx_vpermilvar_ps;
2298 else if (VecWidth == 128 && EltWidth == 64)
2299 IID = Intrinsic::x86_avx_vpermilvar_pd;
2300 else if (VecWidth == 256 && EltWidth == 32)
2301 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2302 else if (VecWidth == 256 && EltWidth == 64)
2303 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2304 else if (VecWidth == 512 && EltWidth == 32)
2305 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2306 else if (VecWidth == 512 && EltWidth == 64)
2307 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2308 else
2309 llvm_unreachable("Unexpected intrinsic");
2310 } else if (Name == "cvtpd2dq.256") {
2311 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2312 } else if (Name == "cvtpd2ps.256") {
2313 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2314 } else if (Name == "cvttpd2dq.256") {
2315 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2316 } else if (Name == "cvttps2dq.128") {
2317 IID = Intrinsic::x86_sse2_cvttps2dq;
2318 } else if (Name == "cvttps2dq.256") {
2319 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2320 } else if (Name.starts_with("permvar.")) {
2321 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2322 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2323 IID = Intrinsic::x86_avx2_permps;
2324 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2325 IID = Intrinsic::x86_avx2_permd;
2326 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2327 IID = Intrinsic::x86_avx512_permvar_df_256;
2328 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2329 IID = Intrinsic::x86_avx512_permvar_di_256;
2330 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2331 IID = Intrinsic::x86_avx512_permvar_sf_512;
2332 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2333 IID = Intrinsic::x86_avx512_permvar_si_512;
2334 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2335 IID = Intrinsic::x86_avx512_permvar_df_512;
2336 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2337 IID = Intrinsic::x86_avx512_permvar_di_512;
2338 else if (VecWidth == 128 && EltWidth == 16)
2339 IID = Intrinsic::x86_avx512_permvar_hi_128;
2340 else if (VecWidth == 256 && EltWidth == 16)
2341 IID = Intrinsic::x86_avx512_permvar_hi_256;
2342 else if (VecWidth == 512 && EltWidth == 16)
2343 IID = Intrinsic::x86_avx512_permvar_hi_512;
2344 else if (VecWidth == 128 && EltWidth == 8)
2345 IID = Intrinsic::x86_avx512_permvar_qi_128;
2346 else if (VecWidth == 256 && EltWidth == 8)
2347 IID = Intrinsic::x86_avx512_permvar_qi_256;
2348 else if (VecWidth == 512 && EltWidth == 8)
2349 IID = Intrinsic::x86_avx512_permvar_qi_512;
2350 else
2351 llvm_unreachable("Unexpected intrinsic");
2352 } else if (Name.starts_with("dbpsadbw.")) {
2353 if (VecWidth == 128)
2354 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2355 else if (VecWidth == 256)
2356 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2357 else if (VecWidth == 512)
2358 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2359 else
2360 llvm_unreachable("Unexpected intrinsic");
2361 } else if (Name.starts_with("pmultishift.qb.")) {
2362 if (VecWidth == 128)
2363 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2364 else if (VecWidth == 256)
2365 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2366 else if (VecWidth == 512)
2367 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2368 else
2369 llvm_unreachable("Unexpected intrinsic");
2370 } else if (Name.starts_with("conflict.")) {
2371 if (Name[9] == 'd' && VecWidth == 128)
2372 IID = Intrinsic::x86_avx512_conflict_d_128;
2373 else if (Name[9] == 'd' && VecWidth == 256)
2374 IID = Intrinsic::x86_avx512_conflict_d_256;
2375 else if (Name[9] == 'd' && VecWidth == 512)
2376 IID = Intrinsic::x86_avx512_conflict_d_512;
2377 else if (Name[9] == 'q' && VecWidth == 128)
2378 IID = Intrinsic::x86_avx512_conflict_q_128;
2379 else if (Name[9] == 'q' && VecWidth == 256)
2380 IID = Intrinsic::x86_avx512_conflict_q_256;
2381 else if (Name[9] == 'q' && VecWidth == 512)
2382 IID = Intrinsic::x86_avx512_conflict_q_512;
2383 else
2384 llvm_unreachable("Unexpected intrinsic");
2385 } else if (Name.starts_with("pavg.")) {
2386 if (Name[5] == 'b' && VecWidth == 128)
2387 IID = Intrinsic::x86_sse2_pavg_b;
2388 else if (Name[5] == 'b' && VecWidth == 256)
2389 IID = Intrinsic::x86_avx2_pavg_b;
2390 else if (Name[5] == 'b' && VecWidth == 512)
2391 IID = Intrinsic::x86_avx512_pavg_b_512;
2392 else if (Name[5] == 'w' && VecWidth == 128)
2393 IID = Intrinsic::x86_sse2_pavg_w;
2394 else if (Name[5] == 'w' && VecWidth == 256)
2395 IID = Intrinsic::x86_avx2_pavg_w;
2396 else if (Name[5] == 'w' && VecWidth == 512)
2397 IID = Intrinsic::x86_avx512_pavg_w_512;
2398 else
2399 llvm_unreachable("Unexpected intrinsic");
2400 } else
2401 return false;
2402
2403 SmallVector<Value *, 4> Args(CI.args());
2404 Args.pop_back();
2405 Args.pop_back();
2406 Rep = Builder.CreateIntrinsic(IID, Args);
2407 unsigned NumArgs = CI.arg_size();
2408 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2409 CI.getArgOperand(NumArgs - 2));
2410 return true;
2411 }
2412
2413 /// Upgrade comment in call to inline asm that represents an objc retain release
2414 /// marker.
UpgradeInlineAsmString(std::string * AsmStr)2415 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2416 size_t Pos;
2417 if (AsmStr->find("mov\tfp") == 0 &&
2418 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2419 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2420 AsmStr->replace(Pos, 1, ";");
2421 }
2422 }
2423
upgradeNVVMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2424 static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2425 Function *F, IRBuilder<> &Builder) {
2426 Value *Rep = nullptr;
2427
2428 if (Name == "abs.i" || Name == "abs.ll") {
2429 Value *Arg = CI->getArgOperand(0);
2430 Value *Neg = Builder.CreateNeg(Arg, "neg");
2431 Value *Cmp = Builder.CreateICmpSGE(
2432 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2433 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2434 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2435 Type *Ty = (Name == "abs.bf16")
2436 ? Builder.getBFloatTy()
2437 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2438 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2439 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2440 Rep = Builder.CreateBitCast(Abs, CI->getType());
2441 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2442 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2443 : Intrinsic::nvvm_fabs;
2444 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2445 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2446 Name.starts_with("atomic.load.add.f64.p")) {
2447 Value *Ptr = CI->getArgOperand(0);
2448 Value *Val = CI->getArgOperand(1);
2449 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2450 AtomicOrdering::SequentiallyConsistent);
2451 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2452 Name.starts_with("atomic.load.dec.32.p")) {
2453 Value *Ptr = CI->getArgOperand(0);
2454 Value *Val = CI->getArgOperand(1);
2455 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2456 : AtomicRMWInst::UDecWrap;
2457 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2458 AtomicOrdering::SequentiallyConsistent);
2459 } else if (Name.consume_front("max.") &&
2460 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2461 Name == "ui" || Name == "ull")) {
2462 Value *Arg0 = CI->getArgOperand(0);
2463 Value *Arg1 = CI->getArgOperand(1);
2464 Value *Cmp = Name.starts_with("u")
2465 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2466 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2467 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2468 } else if (Name.consume_front("min.") &&
2469 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2470 Name == "ui" || Name == "ull")) {
2471 Value *Arg0 = CI->getArgOperand(0);
2472 Value *Arg1 = CI->getArgOperand(1);
2473 Value *Cmp = Name.starts_with("u")
2474 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2475 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2476 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2477 } else if (Name == "clz.ll") {
2478 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2479 Value *Arg = CI->getArgOperand(0);
2480 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2481 {Arg, Builder.getFalse()},
2482 /*FMFSource=*/nullptr, "ctlz");
2483 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2484 } else if (Name == "popc.ll") {
2485 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2486 // i64.
2487 Value *Arg = CI->getArgOperand(0);
2488 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2489 Arg, /*FMFSource=*/nullptr, "ctpop");
2490 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2491 } else if (Name == "h2f") {
2492 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2493 {Builder.getFloatTy()}, CI->getArgOperand(0),
2494 /*FMFSource=*/nullptr, "h2f");
2495 } else if (Name.consume_front("bitcast.") &&
2496 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2497 Name == "d2ll")) {
2498 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2499 } else if (Name == "rotate.b32") {
2500 Value *Arg = CI->getOperand(0);
2501 Value *ShiftAmt = CI->getOperand(1);
2502 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2503 {Arg, Arg, ShiftAmt});
2504 } else if (Name == "rotate.b64") {
2505 Type *Int64Ty = Builder.getInt64Ty();
2506 Value *Arg = CI->getOperand(0);
2507 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2508 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2509 {Arg, Arg, ZExtShiftAmt});
2510 } else if (Name == "rotate.right.b64") {
2511 Type *Int64Ty = Builder.getInt64Ty();
2512 Value *Arg = CI->getOperand(0);
2513 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2514 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2515 {Arg, Arg, ZExtShiftAmt});
2516 } else if (Name == "swap.lo.hi.b64") {
2517 Type *Int64Ty = Builder.getInt64Ty();
2518 Value *Arg = CI->getOperand(0);
2519 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2520 {Arg, Arg, Builder.getInt64(32)});
2521 } else if ((Name.consume_front("ptr.gen.to.") &&
2522 consumeNVVMPtrAddrSpace(Name)) ||
2523 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2524 Name.starts_with(".to.gen"))) {
2525 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2526 } else if (Name.consume_front("ldg.global")) {
2527 Value *Ptr = CI->getArgOperand(0);
2528 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2529 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2530 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2531 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2532 MDNode *MD = MDNode::get(Builder.getContext(), {});
2533 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2534 return LD;
2535 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2536 Value *Arg =
2537 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2538 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2539 {}, {Arg});
2540 } else if (Name == "barrier") {
2541 Rep = Builder.CreateIntrinsic(
2542 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2543 {CI->getArgOperand(0), CI->getArgOperand(1)});
2544 } else if (Name == "barrier.sync") {
2545 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2546 {CI->getArgOperand(0)});
2547 } else if (Name == "barrier.sync.cnt") {
2548 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2549 {CI->getArgOperand(0), CI->getArgOperand(1)});
2550 } else {
2551 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2552 if (IID != Intrinsic::not_intrinsic &&
2553 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2554 rename(F);
2555 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2556 SmallVector<Value *, 2> Args;
2557 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2558 Value *Arg = CI->getArgOperand(I);
2559 Type *OldType = Arg->getType();
2560 Type *NewType = NewFn->getArg(I)->getType();
2561 Args.push_back(
2562 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2563 ? Builder.CreateBitCast(Arg, NewType)
2564 : Arg);
2565 }
2566 Rep = Builder.CreateCall(NewFn, Args);
2567 if (F->getReturnType()->isIntegerTy())
2568 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2569 }
2570 }
2571
2572 return Rep;
2573 }
2574
upgradeX86IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)2575 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2576 IRBuilder<> &Builder) {
2577 LLVMContext &C = F->getContext();
2578 Value *Rep = nullptr;
2579
2580 if (Name.starts_with("sse4a.movnt.")) {
2581 SmallVector<Metadata *, 1> Elts;
2582 Elts.push_back(
2583 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2584 MDNode *Node = MDNode::get(C, Elts);
2585
2586 Value *Arg0 = CI->getArgOperand(0);
2587 Value *Arg1 = CI->getArgOperand(1);
2588
2589 // Nontemporal (unaligned) store of the 0'th element of the float/double
2590 // vector.
2591 Value *Extract =
2592 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2593
2594 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2595 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2596 } else if (Name.starts_with("avx.movnt.") ||
2597 Name.starts_with("avx512.storent.")) {
2598 SmallVector<Metadata *, 1> Elts;
2599 Elts.push_back(
2600 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2601 MDNode *Node = MDNode::get(C, Elts);
2602
2603 Value *Arg0 = CI->getArgOperand(0);
2604 Value *Arg1 = CI->getArgOperand(1);
2605
2606 StoreInst *SI = Builder.CreateAlignedStore(
2607 Arg1, Arg0,
2608 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2609 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2610 } else if (Name == "sse2.storel.dq") {
2611 Value *Arg0 = CI->getArgOperand(0);
2612 Value *Arg1 = CI->getArgOperand(1);
2613
2614 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2615 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2616 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2617 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2618 } else if (Name.starts_with("sse.storeu.") ||
2619 Name.starts_with("sse2.storeu.") ||
2620 Name.starts_with("avx.storeu.")) {
2621 Value *Arg0 = CI->getArgOperand(0);
2622 Value *Arg1 = CI->getArgOperand(1);
2623 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2624 } else if (Name == "avx512.mask.store.ss") {
2625 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2626 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2627 Mask, false);
2628 } else if (Name.starts_with("avx512.mask.store")) {
2629 // "avx512.mask.storeu." or "avx512.mask.store."
2630 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2631 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2632 CI->getArgOperand(2), Aligned);
2633 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2634 // Upgrade packed integer vector compare intrinsics to compare instructions.
2635 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2636 bool CmpEq = Name[9] == 'e';
2637 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2638 CI->getArgOperand(0), CI->getArgOperand(1));
2639 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2640 } else if (Name.starts_with("avx512.broadcastm")) {
2641 Type *ExtTy = Type::getInt32Ty(C);
2642 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2643 ExtTy = Type::getInt64Ty(C);
2644 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2645 ExtTy->getPrimitiveSizeInBits();
2646 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2647 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2648 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2649 Value *Vec = CI->getArgOperand(0);
2650 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2651 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2652 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2653 } else if (Name.starts_with("avx.sqrt.p") ||
2654 Name.starts_with("sse2.sqrt.p") ||
2655 Name.starts_with("sse.sqrt.p")) {
2656 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2657 {CI->getArgOperand(0)});
2658 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2659 if (CI->arg_size() == 4 &&
2660 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2661 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2662 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2663 : Intrinsic::x86_avx512_sqrt_pd_512;
2664
2665 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2666 Rep = Builder.CreateIntrinsic(IID, Args);
2667 } else {
2668 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2669 {CI->getArgOperand(0)});
2670 }
2671 Rep =
2672 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2673 } else if (Name.starts_with("avx512.ptestm") ||
2674 Name.starts_with("avx512.ptestnm")) {
2675 Value *Op0 = CI->getArgOperand(0);
2676 Value *Op1 = CI->getArgOperand(1);
2677 Value *Mask = CI->getArgOperand(2);
2678 Rep = Builder.CreateAnd(Op0, Op1);
2679 llvm::Type *Ty = Op0->getType();
2680 Value *Zero = llvm::Constant::getNullValue(Ty);
2681 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2682 ? ICmpInst::ICMP_NE
2683 : ICmpInst::ICMP_EQ;
2684 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2685 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2686 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2687 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2688 ->getNumElements();
2689 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2690 Rep =
2691 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2692 } else if (Name.starts_with("avx512.kunpck")) {
2693 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2694 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2695 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2696 int Indices[64];
2697 for (unsigned i = 0; i != NumElts; ++i)
2698 Indices[i] = i;
2699
2700 // First extract half of each vector. This gives better codegen than
2701 // doing it in a single shuffle.
2702 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2703 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2704 // Concat the vectors.
2705 // NOTE: Operands have to be swapped to match intrinsic definition.
2706 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2707 Rep = Builder.CreateBitCast(Rep, CI->getType());
2708 } else if (Name == "avx512.kand.w") {
2709 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2710 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2711 Rep = Builder.CreateAnd(LHS, RHS);
2712 Rep = Builder.CreateBitCast(Rep, CI->getType());
2713 } else if (Name == "avx512.kandn.w") {
2714 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2715 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2716 LHS = Builder.CreateNot(LHS);
2717 Rep = Builder.CreateAnd(LHS, RHS);
2718 Rep = Builder.CreateBitCast(Rep, CI->getType());
2719 } else if (Name == "avx512.kor.w") {
2720 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2721 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2722 Rep = Builder.CreateOr(LHS, RHS);
2723 Rep = Builder.CreateBitCast(Rep, CI->getType());
2724 } else if (Name == "avx512.kxor.w") {
2725 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2726 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2727 Rep = Builder.CreateXor(LHS, RHS);
2728 Rep = Builder.CreateBitCast(Rep, CI->getType());
2729 } else if (Name == "avx512.kxnor.w") {
2730 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2731 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2732 LHS = Builder.CreateNot(LHS);
2733 Rep = Builder.CreateXor(LHS, RHS);
2734 Rep = Builder.CreateBitCast(Rep, CI->getType());
2735 } else if (Name == "avx512.knot.w") {
2736 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2737 Rep = Builder.CreateNot(Rep);
2738 Rep = Builder.CreateBitCast(Rep, CI->getType());
2739 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2740 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2741 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2742 Rep = Builder.CreateOr(LHS, RHS);
2743 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2744 Value *C;
2745 if (Name[14] == 'c')
2746 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2747 else
2748 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2749 Rep = Builder.CreateICmpEQ(Rep, C);
2750 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2751 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2752 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2753 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2754 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2755 Type *I32Ty = Type::getInt32Ty(C);
2756 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2757 ConstantInt::get(I32Ty, 0));
2758 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2759 ConstantInt::get(I32Ty, 0));
2760 Value *EltOp;
2761 if (Name.contains(".add."))
2762 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2763 else if (Name.contains(".sub."))
2764 EltOp = Builder.CreateFSub(Elt0, Elt1);
2765 else if (Name.contains(".mul."))
2766 EltOp = Builder.CreateFMul(Elt0, Elt1);
2767 else
2768 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2769 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2770 ConstantInt::get(I32Ty, 0));
2771 } else if (Name.starts_with("avx512.mask.pcmp")) {
2772 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2773 bool CmpEq = Name[16] == 'e';
2774 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2775 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2776 Type *OpTy = CI->getArgOperand(0)->getType();
2777 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2778 Intrinsic::ID IID;
2779 switch (VecWidth) {
2780 default:
2781 llvm_unreachable("Unexpected intrinsic");
2782 case 128:
2783 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2784 break;
2785 case 256:
2786 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2787 break;
2788 case 512:
2789 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2790 break;
2791 }
2792
2793 Rep =
2794 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2795 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2796 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2797 Type *OpTy = CI->getArgOperand(0)->getType();
2798 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2799 unsigned EltWidth = OpTy->getScalarSizeInBits();
2800 Intrinsic::ID IID;
2801 if (VecWidth == 128 && EltWidth == 32)
2802 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2803 else if (VecWidth == 256 && EltWidth == 32)
2804 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2805 else if (VecWidth == 512 && EltWidth == 32)
2806 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2807 else if (VecWidth == 128 && EltWidth == 64)
2808 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2809 else if (VecWidth == 256 && EltWidth == 64)
2810 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2811 else if (VecWidth == 512 && EltWidth == 64)
2812 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2813 else
2814 llvm_unreachable("Unexpected intrinsic");
2815
2816 Rep =
2817 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2818 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2819 } else if (Name.starts_with("avx512.cmp.p")) {
2820 SmallVector<Value *, 4> Args(CI->args());
2821 Type *OpTy = Args[0]->getType();
2822 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2823 unsigned EltWidth = OpTy->getScalarSizeInBits();
2824 Intrinsic::ID IID;
2825 if (VecWidth == 128 && EltWidth == 32)
2826 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2827 else if (VecWidth == 256 && EltWidth == 32)
2828 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2829 else if (VecWidth == 512 && EltWidth == 32)
2830 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2831 else if (VecWidth == 128 && EltWidth == 64)
2832 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2833 else if (VecWidth == 256 && EltWidth == 64)
2834 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2835 else if (VecWidth == 512 && EltWidth == 64)
2836 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2837 else
2838 llvm_unreachable("Unexpected intrinsic");
2839
2840 Value *Mask = Constant::getAllOnesValue(CI->getType());
2841 if (VecWidth == 512)
2842 std::swap(Mask, Args.back());
2843 Args.push_back(Mask);
2844
2845 Rep = Builder.CreateIntrinsic(IID, Args);
2846 } else if (Name.starts_with("avx512.mask.cmp.")) {
2847 // Integer compare intrinsics.
2848 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2849 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2850 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2851 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2852 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2853 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2854 Name.starts_with("avx512.cvtw2mask.") ||
2855 Name.starts_with("avx512.cvtd2mask.") ||
2856 Name.starts_with("avx512.cvtq2mask.")) {
2857 Value *Op = CI->getArgOperand(0);
2858 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2859 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2860 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2861 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2862 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2863 Name.starts_with("avx512.mask.pabs")) {
2864 Rep = upgradeAbs(Builder, *CI);
2865 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2866 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2867 Name.starts_with("avx512.mask.pmaxs")) {
2868 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2869 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2870 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2871 Name.starts_with("avx512.mask.pmaxu")) {
2872 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2873 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2874 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2875 Name.starts_with("avx512.mask.pmins")) {
2876 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2877 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2878 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2879 Name.starts_with("avx512.mask.pminu")) {
2880 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2881 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2882 Name == "avx512.pmulu.dq.512" ||
2883 Name.starts_with("avx512.mask.pmulu.dq.")) {
2884 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2885 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2886 Name == "avx512.pmul.dq.512" ||
2887 Name.starts_with("avx512.mask.pmul.dq.")) {
2888 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2889 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2890 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2891 Rep =
2892 Builder.CreateSIToFP(CI->getArgOperand(1),
2893 cast<VectorType>(CI->getType())->getElementType());
2894 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2895 } else if (Name == "avx512.cvtusi2sd") {
2896 Rep =
2897 Builder.CreateUIToFP(CI->getArgOperand(1),
2898 cast<VectorType>(CI->getType())->getElementType());
2899 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2900 } else if (Name == "sse2.cvtss2sd") {
2901 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2902 Rep = Builder.CreateFPExt(
2903 Rep, cast<VectorType>(CI->getType())->getElementType());
2904 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2905 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2906 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2907 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2908 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2909 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2910 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2911 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2912 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2913 Name == "avx512.mask.cvtqq2ps.256" ||
2914 Name == "avx512.mask.cvtqq2ps.512" ||
2915 Name == "avx512.mask.cvtuqq2ps.256" ||
2916 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2917 Name == "avx.cvt.ps2.pd.256" ||
2918 Name == "avx512.mask.cvtps2pd.128" ||
2919 Name == "avx512.mask.cvtps2pd.256") {
2920 auto *DstTy = cast<FixedVectorType>(CI->getType());
2921 Rep = CI->getArgOperand(0);
2922 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2923
2924 unsigned NumDstElts = DstTy->getNumElements();
2925 if (NumDstElts < SrcTy->getNumElements()) {
2926 assert(NumDstElts == 2 && "Unexpected vector size");
2927 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2928 }
2929
2930 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2931 bool IsUnsigned = Name.contains("cvtu");
2932 if (IsPS2PD)
2933 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2934 else if (CI->arg_size() == 4 &&
2935 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2936 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2937 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2938 : Intrinsic::x86_avx512_sitofp_round;
2939 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2940 {Rep, CI->getArgOperand(3)});
2941 } else {
2942 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2943 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2944 }
2945
2946 if (CI->arg_size() >= 3)
2947 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2948 CI->getArgOperand(1));
2949 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2950 Name.starts_with("vcvtph2ps.")) {
2951 auto *DstTy = cast<FixedVectorType>(CI->getType());
2952 Rep = CI->getArgOperand(0);
2953 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2954 unsigned NumDstElts = DstTy->getNumElements();
2955 if (NumDstElts != SrcTy->getNumElements()) {
2956 assert(NumDstElts == 4 && "Unexpected vector size");
2957 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2958 }
2959 Rep = Builder.CreateBitCast(
2960 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2961 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2962 if (CI->arg_size() >= 3)
2963 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2964 CI->getArgOperand(1));
2965 } else if (Name.starts_with("avx512.mask.load")) {
2966 // "avx512.mask.loadu." or "avx512.mask.load."
2967 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2968 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2969 CI->getArgOperand(2), Aligned);
2970 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2971 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2972 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2973 ResultTy->getNumElements());
2974
2975 Rep = Builder.CreateIntrinsic(
2976 Intrinsic::masked_expandload, ResultTy,
2977 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2978 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2979 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2980 Value *MaskVec =
2981 getX86MaskVec(Builder, CI->getArgOperand(2),
2982 cast<FixedVectorType>(ResultTy)->getNumElements());
2983
2984 Rep = Builder.CreateIntrinsic(
2985 Intrinsic::masked_compressstore, ResultTy,
2986 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2987 } else if (Name.starts_with("avx512.mask.compress.") ||
2988 Name.starts_with("avx512.mask.expand.")) {
2989 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2990
2991 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2992 ResultTy->getNumElements());
2993
2994 bool IsCompress = Name[12] == 'c';
2995 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2996 : Intrinsic::x86_avx512_mask_expand;
2997 Rep = Builder.CreateIntrinsic(
2998 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2999 } else if (Name.starts_with("xop.vpcom")) {
3000 bool IsSigned;
3001 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3002 Name.ends_with("uq"))
3003 IsSigned = false;
3004 else if (Name.ends_with("b") || Name.ends_with("w") ||
3005 Name.ends_with("d") || Name.ends_with("q"))
3006 IsSigned = true;
3007 else
3008 llvm_unreachable("Unknown suffix");
3009
3010 unsigned Imm;
3011 if (CI->arg_size() == 3) {
3012 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3013 } else {
3014 Name = Name.substr(9); // strip off "xop.vpcom"
3015 if (Name.starts_with("lt"))
3016 Imm = 0;
3017 else if (Name.starts_with("le"))
3018 Imm = 1;
3019 else if (Name.starts_with("gt"))
3020 Imm = 2;
3021 else if (Name.starts_with("ge"))
3022 Imm = 3;
3023 else if (Name.starts_with("eq"))
3024 Imm = 4;
3025 else if (Name.starts_with("ne"))
3026 Imm = 5;
3027 else if (Name.starts_with("false"))
3028 Imm = 6;
3029 else if (Name.starts_with("true"))
3030 Imm = 7;
3031 else
3032 llvm_unreachable("Unknown condition");
3033 }
3034
3035 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3036 } else if (Name.starts_with("xop.vpcmov")) {
3037 Value *Sel = CI->getArgOperand(2);
3038 Value *NotSel = Builder.CreateNot(Sel);
3039 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3040 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3041 Rep = Builder.CreateOr(Sel0, Sel1);
3042 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3043 Name.starts_with("avx512.mask.prol")) {
3044 Rep = upgradeX86Rotate(Builder, *CI, false);
3045 } else if (Name.starts_with("avx512.pror") ||
3046 Name.starts_with("avx512.mask.pror")) {
3047 Rep = upgradeX86Rotate(Builder, *CI, true);
3048 } else if (Name.starts_with("avx512.vpshld.") ||
3049 Name.starts_with("avx512.mask.vpshld") ||
3050 Name.starts_with("avx512.maskz.vpshld")) {
3051 bool ZeroMask = Name[11] == 'z';
3052 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3053 } else if (Name.starts_with("avx512.vpshrd.") ||
3054 Name.starts_with("avx512.mask.vpshrd") ||
3055 Name.starts_with("avx512.maskz.vpshrd")) {
3056 bool ZeroMask = Name[11] == 'z';
3057 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3058 } else if (Name == "sse42.crc32.64.8") {
3059 Value *Trunc0 =
3060 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3061 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3062 {Trunc0, CI->getArgOperand(1)});
3063 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3064 } else if (Name.starts_with("avx.vbroadcast.s") ||
3065 Name.starts_with("avx512.vbroadcast.s")) {
3066 // Replace broadcasts with a series of insertelements.
3067 auto *VecTy = cast<FixedVectorType>(CI->getType());
3068 Type *EltTy = VecTy->getElementType();
3069 unsigned EltNum = VecTy->getNumElements();
3070 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3071 Type *I32Ty = Type::getInt32Ty(C);
3072 Rep = PoisonValue::get(VecTy);
3073 for (unsigned I = 0; I < EltNum; ++I)
3074 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3075 } else if (Name.starts_with("sse41.pmovsx") ||
3076 Name.starts_with("sse41.pmovzx") ||
3077 Name.starts_with("avx2.pmovsx") ||
3078 Name.starts_with("avx2.pmovzx") ||
3079 Name.starts_with("avx512.mask.pmovsx") ||
3080 Name.starts_with("avx512.mask.pmovzx")) {
3081 auto *DstTy = cast<FixedVectorType>(CI->getType());
3082 unsigned NumDstElts = DstTy->getNumElements();
3083
3084 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3085 SmallVector<int, 8> ShuffleMask(NumDstElts);
3086 for (unsigned i = 0; i != NumDstElts; ++i)
3087 ShuffleMask[i] = i;
3088
3089 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3090
3091 bool DoSext = Name.contains("pmovsx");
3092 Rep =
3093 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3094 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3095 if (CI->arg_size() == 3)
3096 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3097 CI->getArgOperand(1));
3098 } else if (Name == "avx512.mask.pmov.qd.256" ||
3099 Name == "avx512.mask.pmov.qd.512" ||
3100 Name == "avx512.mask.pmov.wb.256" ||
3101 Name == "avx512.mask.pmov.wb.512") {
3102 Type *Ty = CI->getArgOperand(1)->getType();
3103 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3104 Rep =
3105 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3106 } else if (Name.starts_with("avx.vbroadcastf128") ||
3107 Name == "avx2.vbroadcasti128") {
3108 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3109 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3110 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3111 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3112 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3113 if (NumSrcElts == 2)
3114 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3115 else
3116 Rep = Builder.CreateShuffleVector(Load,
3117 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3118 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3119 Name.starts_with("avx512.mask.shuf.f")) {
3120 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3121 Type *VT = CI->getType();
3122 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3123 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3124 unsigned ControlBitsMask = NumLanes - 1;
3125 unsigned NumControlBits = NumLanes / 2;
3126 SmallVector<int, 8> ShuffleMask(0);
3127
3128 for (unsigned l = 0; l != NumLanes; ++l) {
3129 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3130 // We actually need the other source.
3131 if (l >= NumLanes / 2)
3132 LaneMask += NumLanes;
3133 for (unsigned i = 0; i != NumElementsInLane; ++i)
3134 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3135 }
3136 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3137 CI->getArgOperand(1), ShuffleMask);
3138 Rep =
3139 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3140 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3141 Name.starts_with("avx512.mask.broadcasti")) {
3142 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3143 ->getNumElements();
3144 unsigned NumDstElts =
3145 cast<FixedVectorType>(CI->getType())->getNumElements();
3146
3147 SmallVector<int, 8> ShuffleMask(NumDstElts);
3148 for (unsigned i = 0; i != NumDstElts; ++i)
3149 ShuffleMask[i] = i % NumSrcElts;
3150
3151 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3152 CI->getArgOperand(0), ShuffleMask);
3153 Rep =
3154 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3155 } else if (Name.starts_with("avx2.pbroadcast") ||
3156 Name.starts_with("avx2.vbroadcast") ||
3157 Name.starts_with("avx512.pbroadcast") ||
3158 Name.starts_with("avx512.mask.broadcast.s")) {
3159 // Replace vp?broadcasts with a vector shuffle.
3160 Value *Op = CI->getArgOperand(0);
3161 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3162 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3163 SmallVector<int, 8> M;
3164 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3165 Rep = Builder.CreateShuffleVector(Op, M);
3166
3167 if (CI->arg_size() == 3)
3168 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3169 CI->getArgOperand(1));
3170 } else if (Name.starts_with("sse2.padds.") ||
3171 Name.starts_with("avx2.padds.") ||
3172 Name.starts_with("avx512.padds.") ||
3173 Name.starts_with("avx512.mask.padds.")) {
3174 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3175 } else if (Name.starts_with("sse2.psubs.") ||
3176 Name.starts_with("avx2.psubs.") ||
3177 Name.starts_with("avx512.psubs.") ||
3178 Name.starts_with("avx512.mask.psubs.")) {
3179 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3180 } else if (Name.starts_with("sse2.paddus.") ||
3181 Name.starts_with("avx2.paddus.") ||
3182 Name.starts_with("avx512.mask.paddus.")) {
3183 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3184 } else if (Name.starts_with("sse2.psubus.") ||
3185 Name.starts_with("avx2.psubus.") ||
3186 Name.starts_with("avx512.mask.psubus.")) {
3187 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3188 } else if (Name.starts_with("avx512.mask.palignr.")) {
3189 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3190 CI->getArgOperand(1), CI->getArgOperand(2),
3191 CI->getArgOperand(3), CI->getArgOperand(4),
3192 false);
3193 } else if (Name.starts_with("avx512.mask.valign.")) {
3194 Rep = upgradeX86ALIGNIntrinsics(
3195 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3196 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3197 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3198 // 128/256-bit shift left specified in bits.
3199 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3200 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3201 Shift / 8); // Shift is in bits.
3202 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3203 // 128/256-bit shift right specified in bits.
3204 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3205 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3206 Shift / 8); // Shift is in bits.
3207 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3208 Name == "avx512.psll.dq.512") {
3209 // 128/256/512-bit shift left specified in bytes.
3210 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3211 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3212 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3213 Name == "avx512.psrl.dq.512") {
3214 // 128/256/512-bit shift right specified in bytes.
3215 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3216 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3217 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3218 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3219 Name.starts_with("avx2.pblendd.")) {
3220 Value *Op0 = CI->getArgOperand(0);
3221 Value *Op1 = CI->getArgOperand(1);
3222 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3223 auto *VecTy = cast<FixedVectorType>(CI->getType());
3224 unsigned NumElts = VecTy->getNumElements();
3225
3226 SmallVector<int, 16> Idxs(NumElts);
3227 for (unsigned i = 0; i != NumElts; ++i)
3228 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3229
3230 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3231 } else if (Name.starts_with("avx.vinsertf128.") ||
3232 Name == "avx2.vinserti128" ||
3233 Name.starts_with("avx512.mask.insert")) {
3234 Value *Op0 = CI->getArgOperand(0);
3235 Value *Op1 = CI->getArgOperand(1);
3236 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3237 unsigned DstNumElts =
3238 cast<FixedVectorType>(CI->getType())->getNumElements();
3239 unsigned SrcNumElts =
3240 cast<FixedVectorType>(Op1->getType())->getNumElements();
3241 unsigned Scale = DstNumElts / SrcNumElts;
3242
3243 // Mask off the high bits of the immediate value; hardware ignores those.
3244 Imm = Imm % Scale;
3245
3246 // Extend the second operand into a vector the size of the destination.
3247 SmallVector<int, 8> Idxs(DstNumElts);
3248 for (unsigned i = 0; i != SrcNumElts; ++i)
3249 Idxs[i] = i;
3250 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3251 Idxs[i] = SrcNumElts;
3252 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3253
3254 // Insert the second operand into the first operand.
3255
3256 // Note that there is no guarantee that instruction lowering will actually
3257 // produce a vinsertf128 instruction for the created shuffles. In
3258 // particular, the 0 immediate case involves no lane changes, so it can
3259 // be handled as a blend.
3260
3261 // Example of shuffle mask for 32-bit elements:
3262 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3263 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3264
3265 // First fill with identify mask.
3266 for (unsigned i = 0; i != DstNumElts; ++i)
3267 Idxs[i] = i;
3268 // Then replace the elements where we need to insert.
3269 for (unsigned i = 0; i != SrcNumElts; ++i)
3270 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3271 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3272
3273 // If the intrinsic has a mask operand, handle that.
3274 if (CI->arg_size() == 5)
3275 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3276 CI->getArgOperand(3));
3277 } else if (Name.starts_with("avx.vextractf128.") ||
3278 Name == "avx2.vextracti128" ||
3279 Name.starts_with("avx512.mask.vextract")) {
3280 Value *Op0 = CI->getArgOperand(0);
3281 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3282 unsigned DstNumElts =
3283 cast<FixedVectorType>(CI->getType())->getNumElements();
3284 unsigned SrcNumElts =
3285 cast<FixedVectorType>(Op0->getType())->getNumElements();
3286 unsigned Scale = SrcNumElts / DstNumElts;
3287
3288 // Mask off the high bits of the immediate value; hardware ignores those.
3289 Imm = Imm % Scale;
3290
3291 // Get indexes for the subvector of the input vector.
3292 SmallVector<int, 8> Idxs(DstNumElts);
3293 for (unsigned i = 0; i != DstNumElts; ++i) {
3294 Idxs[i] = i + (Imm * DstNumElts);
3295 }
3296 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3297
3298 // If the intrinsic has a mask operand, handle that.
3299 if (CI->arg_size() == 4)
3300 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3301 CI->getArgOperand(2));
3302 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3303 Name.starts_with("avx512.mask.perm.di.")) {
3304 Value *Op0 = CI->getArgOperand(0);
3305 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3306 auto *VecTy = cast<FixedVectorType>(CI->getType());
3307 unsigned NumElts = VecTy->getNumElements();
3308
3309 SmallVector<int, 8> Idxs(NumElts);
3310 for (unsigned i = 0; i != NumElts; ++i)
3311 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3312
3313 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3314
3315 if (CI->arg_size() == 4)
3316 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3317 CI->getArgOperand(2));
3318 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3319 // The immediate permute control byte looks like this:
3320 // [1:0] - select 128 bits from sources for low half of destination
3321 // [2] - ignore
3322 // [3] - zero low half of destination
3323 // [5:4] - select 128 bits from sources for high half of destination
3324 // [6] - ignore
3325 // [7] - zero high half of destination
3326
3327 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3328
3329 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3330 unsigned HalfSize = NumElts / 2;
3331 SmallVector<int, 8> ShuffleMask(NumElts);
3332
3333 // Determine which operand(s) are actually in use for this instruction.
3334 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3335 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3336
3337 // If needed, replace operands based on zero mask.
3338 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3339 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3340
3341 // Permute low half of result.
3342 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3343 for (unsigned i = 0; i < HalfSize; ++i)
3344 ShuffleMask[i] = StartIndex + i;
3345
3346 // Permute high half of result.
3347 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3348 for (unsigned i = 0; i < HalfSize; ++i)
3349 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3350
3351 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3352
3353 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3354 Name.starts_with("avx512.mask.vpermil.p") ||
3355 Name.starts_with("avx512.mask.pshuf.d.")) {
3356 Value *Op0 = CI->getArgOperand(0);
3357 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3358 auto *VecTy = cast<FixedVectorType>(CI->getType());
3359 unsigned NumElts = VecTy->getNumElements();
3360 // Calculate the size of each index in the immediate.
3361 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3362 unsigned IdxMask = ((1 << IdxSize) - 1);
3363
3364 SmallVector<int, 8> Idxs(NumElts);
3365 // Lookup the bits for this element, wrapping around the immediate every
3366 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3367 // to offset by the first index of each group.
3368 for (unsigned i = 0; i != NumElts; ++i)
3369 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3370
3371 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3372
3373 if (CI->arg_size() == 4)
3374 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3375 CI->getArgOperand(2));
3376 } else if (Name == "sse2.pshufl.w" ||
3377 Name.starts_with("avx512.mask.pshufl.w.")) {
3378 Value *Op0 = CI->getArgOperand(0);
3379 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3380 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3381
3382 SmallVector<int, 16> Idxs(NumElts);
3383 for (unsigned l = 0; l != NumElts; l += 8) {
3384 for (unsigned i = 0; i != 4; ++i)
3385 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3386 for (unsigned i = 4; i != 8; ++i)
3387 Idxs[i + l] = i + l;
3388 }
3389
3390 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3391
3392 if (CI->arg_size() == 4)
3393 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3394 CI->getArgOperand(2));
3395 } else if (Name == "sse2.pshufh.w" ||
3396 Name.starts_with("avx512.mask.pshufh.w.")) {
3397 Value *Op0 = CI->getArgOperand(0);
3398 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3399 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3400
3401 SmallVector<int, 16> Idxs(NumElts);
3402 for (unsigned l = 0; l != NumElts; l += 8) {
3403 for (unsigned i = 0; i != 4; ++i)
3404 Idxs[i + l] = i + l;
3405 for (unsigned i = 0; i != 4; ++i)
3406 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3407 }
3408
3409 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3410
3411 if (CI->arg_size() == 4)
3412 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3413 CI->getArgOperand(2));
3414 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3415 Value *Op0 = CI->getArgOperand(0);
3416 Value *Op1 = CI->getArgOperand(1);
3417 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3418 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3419
3420 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3421 unsigned HalfLaneElts = NumLaneElts / 2;
3422
3423 SmallVector<int, 16> Idxs(NumElts);
3424 for (unsigned i = 0; i != NumElts; ++i) {
3425 // Base index is the starting element of the lane.
3426 Idxs[i] = i - (i % NumLaneElts);
3427 // If we are half way through the lane switch to the other source.
3428 if ((i % NumLaneElts) >= HalfLaneElts)
3429 Idxs[i] += NumElts;
3430 // Now select the specific element. By adding HalfLaneElts bits from
3431 // the immediate. Wrapping around the immediate every 8-bits.
3432 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3433 }
3434
3435 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3436
3437 Rep =
3438 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3439 } else if (Name.starts_with("avx512.mask.movddup") ||
3440 Name.starts_with("avx512.mask.movshdup") ||
3441 Name.starts_with("avx512.mask.movsldup")) {
3442 Value *Op0 = CI->getArgOperand(0);
3443 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3444 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3445
3446 unsigned Offset = 0;
3447 if (Name.starts_with("avx512.mask.movshdup."))
3448 Offset = 1;
3449
3450 SmallVector<int, 16> Idxs(NumElts);
3451 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3452 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3453 Idxs[i + l + 0] = i + l + Offset;
3454 Idxs[i + l + 1] = i + l + Offset;
3455 }
3456
3457 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3458
3459 Rep =
3460 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3461 } else if (Name.starts_with("avx512.mask.punpckl") ||
3462 Name.starts_with("avx512.mask.unpckl.")) {
3463 Value *Op0 = CI->getArgOperand(0);
3464 Value *Op1 = CI->getArgOperand(1);
3465 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3466 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3467
3468 SmallVector<int, 64> Idxs(NumElts);
3469 for (int l = 0; l != NumElts; l += NumLaneElts)
3470 for (int i = 0; i != NumLaneElts; ++i)
3471 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3472
3473 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3474
3475 Rep =
3476 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3477 } else if (Name.starts_with("avx512.mask.punpckh") ||
3478 Name.starts_with("avx512.mask.unpckh.")) {
3479 Value *Op0 = CI->getArgOperand(0);
3480 Value *Op1 = CI->getArgOperand(1);
3481 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3482 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3483
3484 SmallVector<int, 64> Idxs(NumElts);
3485 for (int l = 0; l != NumElts; l += NumLaneElts)
3486 for (int i = 0; i != NumLaneElts; ++i)
3487 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3488
3489 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3490
3491 Rep =
3492 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3493 } else if (Name.starts_with("avx512.mask.and.") ||
3494 Name.starts_with("avx512.mask.pand.")) {
3495 VectorType *FTy = cast<VectorType>(CI->getType());
3496 VectorType *ITy = VectorType::getInteger(FTy);
3497 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3498 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3499 Rep = Builder.CreateBitCast(Rep, FTy);
3500 Rep =
3501 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3502 } else if (Name.starts_with("avx512.mask.andn.") ||
3503 Name.starts_with("avx512.mask.pandn.")) {
3504 VectorType *FTy = cast<VectorType>(CI->getType());
3505 VectorType *ITy = VectorType::getInteger(FTy);
3506 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3507 Rep = Builder.CreateAnd(Rep,
3508 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3509 Rep = Builder.CreateBitCast(Rep, FTy);
3510 Rep =
3511 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3512 } else if (Name.starts_with("avx512.mask.or.") ||
3513 Name.starts_with("avx512.mask.por.")) {
3514 VectorType *FTy = cast<VectorType>(CI->getType());
3515 VectorType *ITy = VectorType::getInteger(FTy);
3516 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3517 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3518 Rep = Builder.CreateBitCast(Rep, FTy);
3519 Rep =
3520 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3521 } else if (Name.starts_with("avx512.mask.xor.") ||
3522 Name.starts_with("avx512.mask.pxor.")) {
3523 VectorType *FTy = cast<VectorType>(CI->getType());
3524 VectorType *ITy = VectorType::getInteger(FTy);
3525 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3526 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3527 Rep = Builder.CreateBitCast(Rep, FTy);
3528 Rep =
3529 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3530 } else if (Name.starts_with("avx512.mask.padd.")) {
3531 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3532 Rep =
3533 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3534 } else if (Name.starts_with("avx512.mask.psub.")) {
3535 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3536 Rep =
3537 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3538 } else if (Name.starts_with("avx512.mask.pmull.")) {
3539 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3540 Rep =
3541 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3542 } else if (Name.starts_with("avx512.mask.add.p")) {
3543 if (Name.ends_with(".512")) {
3544 Intrinsic::ID IID;
3545 if (Name[17] == 's')
3546 IID = Intrinsic::x86_avx512_add_ps_512;
3547 else
3548 IID = Intrinsic::x86_avx512_add_pd_512;
3549
3550 Rep = Builder.CreateIntrinsic(
3551 IID,
3552 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3553 } else {
3554 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3555 }
3556 Rep =
3557 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3558 } else if (Name.starts_with("avx512.mask.div.p")) {
3559 if (Name.ends_with(".512")) {
3560 Intrinsic::ID IID;
3561 if (Name[17] == 's')
3562 IID = Intrinsic::x86_avx512_div_ps_512;
3563 else
3564 IID = Intrinsic::x86_avx512_div_pd_512;
3565
3566 Rep = Builder.CreateIntrinsic(
3567 IID,
3568 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3569 } else {
3570 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3571 }
3572 Rep =
3573 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3574 } else if (Name.starts_with("avx512.mask.mul.p")) {
3575 if (Name.ends_with(".512")) {
3576 Intrinsic::ID IID;
3577 if (Name[17] == 's')
3578 IID = Intrinsic::x86_avx512_mul_ps_512;
3579 else
3580 IID = Intrinsic::x86_avx512_mul_pd_512;
3581
3582 Rep = Builder.CreateIntrinsic(
3583 IID,
3584 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3585 } else {
3586 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3587 }
3588 Rep =
3589 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3590 } else if (Name.starts_with("avx512.mask.sub.p")) {
3591 if (Name.ends_with(".512")) {
3592 Intrinsic::ID IID;
3593 if (Name[17] == 's')
3594 IID = Intrinsic::x86_avx512_sub_ps_512;
3595 else
3596 IID = Intrinsic::x86_avx512_sub_pd_512;
3597
3598 Rep = Builder.CreateIntrinsic(
3599 IID,
3600 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3601 } else {
3602 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3603 }
3604 Rep =
3605 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3606 } else if ((Name.starts_with("avx512.mask.max.p") ||
3607 Name.starts_with("avx512.mask.min.p")) &&
3608 Name.drop_front(18) == ".512") {
3609 bool IsDouble = Name[17] == 'd';
3610 bool IsMin = Name[13] == 'i';
3611 static const Intrinsic::ID MinMaxTbl[2][2] = {
3612 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3613 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3614 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3615
3616 Rep = Builder.CreateIntrinsic(
3617 IID,
3618 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3619 Rep =
3620 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3621 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3622 Rep =
3623 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3624 {CI->getArgOperand(0), Builder.getInt1(false)});
3625 Rep =
3626 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3627 } else if (Name.starts_with("avx512.mask.psll")) {
3628 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3629 bool IsVariable = Name[16] == 'v';
3630 char Size = Name[16] == '.' ? Name[17]
3631 : Name[17] == '.' ? Name[18]
3632 : Name[18] == '.' ? Name[19]
3633 : Name[20];
3634
3635 Intrinsic::ID IID;
3636 if (IsVariable && Name[17] != '.') {
3637 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3638 IID = Intrinsic::x86_avx2_psllv_q;
3639 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3640 IID = Intrinsic::x86_avx2_psllv_q_256;
3641 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3642 IID = Intrinsic::x86_avx2_psllv_d;
3643 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3644 IID = Intrinsic::x86_avx2_psllv_d_256;
3645 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3646 IID = Intrinsic::x86_avx512_psllv_w_128;
3647 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3648 IID = Intrinsic::x86_avx512_psllv_w_256;
3649 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3650 IID = Intrinsic::x86_avx512_psllv_w_512;
3651 else
3652 llvm_unreachable("Unexpected size");
3653 } else if (Name.ends_with(".128")) {
3654 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3655 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3656 : Intrinsic::x86_sse2_psll_d;
3657 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3658 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3659 : Intrinsic::x86_sse2_psll_q;
3660 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3661 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3662 : Intrinsic::x86_sse2_psll_w;
3663 else
3664 llvm_unreachable("Unexpected size");
3665 } else if (Name.ends_with(".256")) {
3666 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3667 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3668 : Intrinsic::x86_avx2_psll_d;
3669 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3670 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3671 : Intrinsic::x86_avx2_psll_q;
3672 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3673 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3674 : Intrinsic::x86_avx2_psll_w;
3675 else
3676 llvm_unreachable("Unexpected size");
3677 } else {
3678 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3679 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3680 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3681 : Intrinsic::x86_avx512_psll_d_512;
3682 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3683 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3684 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3685 : Intrinsic::x86_avx512_psll_q_512;
3686 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3687 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3688 : Intrinsic::x86_avx512_psll_w_512;
3689 else
3690 llvm_unreachable("Unexpected size");
3691 }
3692
3693 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3694 } else if (Name.starts_with("avx512.mask.psrl")) {
3695 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3696 bool IsVariable = Name[16] == 'v';
3697 char Size = Name[16] == '.' ? Name[17]
3698 : Name[17] == '.' ? Name[18]
3699 : Name[18] == '.' ? Name[19]
3700 : Name[20];
3701
3702 Intrinsic::ID IID;
3703 if (IsVariable && Name[17] != '.') {
3704 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3705 IID = Intrinsic::x86_avx2_psrlv_q;
3706 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3707 IID = Intrinsic::x86_avx2_psrlv_q_256;
3708 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3709 IID = Intrinsic::x86_avx2_psrlv_d;
3710 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3711 IID = Intrinsic::x86_avx2_psrlv_d_256;
3712 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3713 IID = Intrinsic::x86_avx512_psrlv_w_128;
3714 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3715 IID = Intrinsic::x86_avx512_psrlv_w_256;
3716 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3717 IID = Intrinsic::x86_avx512_psrlv_w_512;
3718 else
3719 llvm_unreachable("Unexpected size");
3720 } else if (Name.ends_with(".128")) {
3721 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3722 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3723 : Intrinsic::x86_sse2_psrl_d;
3724 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3725 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3726 : Intrinsic::x86_sse2_psrl_q;
3727 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3728 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3729 : Intrinsic::x86_sse2_psrl_w;
3730 else
3731 llvm_unreachable("Unexpected size");
3732 } else if (Name.ends_with(".256")) {
3733 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3734 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3735 : Intrinsic::x86_avx2_psrl_d;
3736 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3737 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3738 : Intrinsic::x86_avx2_psrl_q;
3739 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3740 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3741 : Intrinsic::x86_avx2_psrl_w;
3742 else
3743 llvm_unreachable("Unexpected size");
3744 } else {
3745 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3746 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3747 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3748 : Intrinsic::x86_avx512_psrl_d_512;
3749 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3750 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3751 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3752 : Intrinsic::x86_avx512_psrl_q_512;
3753 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3754 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3755 : Intrinsic::x86_avx512_psrl_w_512;
3756 else
3757 llvm_unreachable("Unexpected size");
3758 }
3759
3760 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3761 } else if (Name.starts_with("avx512.mask.psra")) {
3762 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3763 bool IsVariable = Name[16] == 'v';
3764 char Size = Name[16] == '.' ? Name[17]
3765 : Name[17] == '.' ? Name[18]
3766 : Name[18] == '.' ? Name[19]
3767 : Name[20];
3768
3769 Intrinsic::ID IID;
3770 if (IsVariable && Name[17] != '.') {
3771 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3772 IID = Intrinsic::x86_avx2_psrav_d;
3773 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3774 IID = Intrinsic::x86_avx2_psrav_d_256;
3775 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3776 IID = Intrinsic::x86_avx512_psrav_w_128;
3777 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3778 IID = Intrinsic::x86_avx512_psrav_w_256;
3779 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3780 IID = Intrinsic::x86_avx512_psrav_w_512;
3781 else
3782 llvm_unreachable("Unexpected size");
3783 } else if (Name.ends_with(".128")) {
3784 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3785 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3786 : Intrinsic::x86_sse2_psra_d;
3787 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3788 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3789 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3790 : Intrinsic::x86_avx512_psra_q_128;
3791 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3792 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3793 : Intrinsic::x86_sse2_psra_w;
3794 else
3795 llvm_unreachable("Unexpected size");
3796 } else if (Name.ends_with(".256")) {
3797 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3798 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3799 : Intrinsic::x86_avx2_psra_d;
3800 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3801 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3802 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3803 : Intrinsic::x86_avx512_psra_q_256;
3804 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3805 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3806 : Intrinsic::x86_avx2_psra_w;
3807 else
3808 llvm_unreachable("Unexpected size");
3809 } else {
3810 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3811 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3812 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3813 : Intrinsic::x86_avx512_psra_d_512;
3814 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3815 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3816 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3817 : Intrinsic::x86_avx512_psra_q_512;
3818 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3819 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3820 : Intrinsic::x86_avx512_psra_w_512;
3821 else
3822 llvm_unreachable("Unexpected size");
3823 }
3824
3825 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3826 } else if (Name.starts_with("avx512.mask.move.s")) {
3827 Rep = upgradeMaskedMove(Builder, *CI);
3828 } else if (Name.starts_with("avx512.cvtmask2")) {
3829 Rep = upgradeMaskToInt(Builder, *CI);
3830 } else if (Name.ends_with(".movntdqa")) {
3831 MDNode *Node = MDNode::get(
3832 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3833
3834 LoadInst *LI = Builder.CreateAlignedLoad(
3835 CI->getType(), CI->getArgOperand(0),
3836 Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3837 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3838 Rep = LI;
3839 } else if (Name.starts_with("fma.vfmadd.") ||
3840 Name.starts_with("fma.vfmsub.") ||
3841 Name.starts_with("fma.vfnmadd.") ||
3842 Name.starts_with("fma.vfnmsub.")) {
3843 bool NegMul = Name[6] == 'n';
3844 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3845 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3846
3847 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3848 CI->getArgOperand(2)};
3849
3850 if (IsScalar) {
3851 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3852 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3853 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3854 }
3855
3856 if (NegMul && !IsScalar)
3857 Ops[0] = Builder.CreateFNeg(Ops[0]);
3858 if (NegMul && IsScalar)
3859 Ops[1] = Builder.CreateFNeg(Ops[1]);
3860 if (NegAcc)
3861 Ops[2] = Builder.CreateFNeg(Ops[2]);
3862
3863 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3864
3865 if (IsScalar)
3866 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3867 } else if (Name.starts_with("fma4.vfmadd.s")) {
3868 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3869 CI->getArgOperand(2)};
3870
3871 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3872 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3873 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3874
3875 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3876
3877 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3878 Rep, (uint64_t)0);
3879 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3880 Name.starts_with("avx512.maskz.vfmadd.s") ||
3881 Name.starts_with("avx512.mask3.vfmadd.s") ||
3882 Name.starts_with("avx512.mask3.vfmsub.s") ||
3883 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3884 bool IsMask3 = Name[11] == '3';
3885 bool IsMaskZ = Name[11] == 'z';
3886 // Drop the "avx512.mask." to make it easier.
3887 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3888 bool NegMul = Name[2] == 'n';
3889 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3890
3891 Value *A = CI->getArgOperand(0);
3892 Value *B = CI->getArgOperand(1);
3893 Value *C = CI->getArgOperand(2);
3894
3895 if (NegMul && (IsMask3 || IsMaskZ))
3896 A = Builder.CreateFNeg(A);
3897 if (NegMul && !(IsMask3 || IsMaskZ))
3898 B = Builder.CreateFNeg(B);
3899 if (NegAcc)
3900 C = Builder.CreateFNeg(C);
3901
3902 A = Builder.CreateExtractElement(A, (uint64_t)0);
3903 B = Builder.CreateExtractElement(B, (uint64_t)0);
3904 C = Builder.CreateExtractElement(C, (uint64_t)0);
3905
3906 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3907 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3908 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3909
3910 Intrinsic::ID IID;
3911 if (Name.back() == 'd')
3912 IID = Intrinsic::x86_avx512_vfmadd_f64;
3913 else
3914 IID = Intrinsic::x86_avx512_vfmadd_f32;
3915 Rep = Builder.CreateIntrinsic(IID, Ops);
3916 } else {
3917 Rep = Builder.CreateFMA(A, B, C);
3918 }
3919
3920 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3921 : IsMask3 ? C
3922 : A;
3923
3924 // For Mask3 with NegAcc, we need to create a new extractelement that
3925 // avoids the negation above.
3926 if (NegAcc && IsMask3)
3927 PassThru =
3928 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3929
3930 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3931 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3932 (uint64_t)0);
3933 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3934 Name.starts_with("avx512.mask.vfnmadd.p") ||
3935 Name.starts_with("avx512.mask.vfnmsub.p") ||
3936 Name.starts_with("avx512.mask3.vfmadd.p") ||
3937 Name.starts_with("avx512.mask3.vfmsub.p") ||
3938 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3939 Name.starts_with("avx512.maskz.vfmadd.p")) {
3940 bool IsMask3 = Name[11] == '3';
3941 bool IsMaskZ = Name[11] == 'z';
3942 // Drop the "avx512.mask." to make it easier.
3943 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3944 bool NegMul = Name[2] == 'n';
3945 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3946
3947 Value *A = CI->getArgOperand(0);
3948 Value *B = CI->getArgOperand(1);
3949 Value *C = CI->getArgOperand(2);
3950
3951 if (NegMul && (IsMask3 || IsMaskZ))
3952 A = Builder.CreateFNeg(A);
3953 if (NegMul && !(IsMask3 || IsMaskZ))
3954 B = Builder.CreateFNeg(B);
3955 if (NegAcc)
3956 C = Builder.CreateFNeg(C);
3957
3958 if (CI->arg_size() == 5 &&
3959 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3960 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3961 Intrinsic::ID IID;
3962 // Check the character before ".512" in string.
3963 if (Name[Name.size() - 5] == 's')
3964 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3965 else
3966 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3967
3968 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
3969 } else {
3970 Rep = Builder.CreateFMA(A, B, C);
3971 }
3972
3973 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3974 : IsMask3 ? CI->getArgOperand(2)
3975 : CI->getArgOperand(0);
3976
3977 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3978 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3979 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3980 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3981 Intrinsic::ID IID;
3982 if (VecWidth == 128 && EltWidth == 32)
3983 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3984 else if (VecWidth == 256 && EltWidth == 32)
3985 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3986 else if (VecWidth == 128 && EltWidth == 64)
3987 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3988 else if (VecWidth == 256 && EltWidth == 64)
3989 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3990 else
3991 llvm_unreachable("Unexpected intrinsic");
3992
3993 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3994 CI->getArgOperand(2)};
3995 Ops[2] = Builder.CreateFNeg(Ops[2]);
3996 Rep = Builder.CreateIntrinsic(IID, Ops);
3997 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3998 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3999 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4000 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4001 bool IsMask3 = Name[11] == '3';
4002 bool IsMaskZ = Name[11] == 'z';
4003 // Drop the "avx512.mask." to make it easier.
4004 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4005 bool IsSubAdd = Name[3] == 's';
4006 if (CI->arg_size() == 5) {
4007 Intrinsic::ID IID;
4008 // Check the character before ".512" in string.
4009 if (Name[Name.size() - 5] == 's')
4010 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4011 else
4012 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4013
4014 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4015 CI->getArgOperand(2), CI->getArgOperand(4)};
4016 if (IsSubAdd)
4017 Ops[2] = Builder.CreateFNeg(Ops[2]);
4018
4019 Rep = Builder.CreateIntrinsic(IID, Ops);
4020 } else {
4021 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4022
4023 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4024 CI->getArgOperand(2)};
4025
4026 Function *FMA = Intrinsic::getOrInsertDeclaration(
4027 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4028 Value *Odd = Builder.CreateCall(FMA, Ops);
4029 Ops[2] = Builder.CreateFNeg(Ops[2]);
4030 Value *Even = Builder.CreateCall(FMA, Ops);
4031
4032 if (IsSubAdd)
4033 std::swap(Even, Odd);
4034
4035 SmallVector<int, 32> Idxs(NumElts);
4036 for (int i = 0; i != NumElts; ++i)
4037 Idxs[i] = i + (i % 2) * NumElts;
4038
4039 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4040 }
4041
4042 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4043 : IsMask3 ? CI->getArgOperand(2)
4044 : CI->getArgOperand(0);
4045
4046 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4047 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4048 Name.starts_with("avx512.maskz.pternlog.")) {
4049 bool ZeroMask = Name[11] == 'z';
4050 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4051 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4052 Intrinsic::ID IID;
4053 if (VecWidth == 128 && EltWidth == 32)
4054 IID = Intrinsic::x86_avx512_pternlog_d_128;
4055 else if (VecWidth == 256 && EltWidth == 32)
4056 IID = Intrinsic::x86_avx512_pternlog_d_256;
4057 else if (VecWidth == 512 && EltWidth == 32)
4058 IID = Intrinsic::x86_avx512_pternlog_d_512;
4059 else if (VecWidth == 128 && EltWidth == 64)
4060 IID = Intrinsic::x86_avx512_pternlog_q_128;
4061 else if (VecWidth == 256 && EltWidth == 64)
4062 IID = Intrinsic::x86_avx512_pternlog_q_256;
4063 else if (VecWidth == 512 && EltWidth == 64)
4064 IID = Intrinsic::x86_avx512_pternlog_q_512;
4065 else
4066 llvm_unreachable("Unexpected intrinsic");
4067
4068 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4069 CI->getArgOperand(2), CI->getArgOperand(3)};
4070 Rep = Builder.CreateIntrinsic(IID, Args);
4071 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4072 : CI->getArgOperand(0);
4073 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4074 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4075 Name.starts_with("avx512.maskz.vpmadd52")) {
4076 bool ZeroMask = Name[11] == 'z';
4077 bool High = Name[20] == 'h' || Name[21] == 'h';
4078 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4079 Intrinsic::ID IID;
4080 if (VecWidth == 128 && !High)
4081 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4082 else if (VecWidth == 256 && !High)
4083 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4084 else if (VecWidth == 512 && !High)
4085 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4086 else if (VecWidth == 128 && High)
4087 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4088 else if (VecWidth == 256 && High)
4089 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4090 else if (VecWidth == 512 && High)
4091 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4092 else
4093 llvm_unreachable("Unexpected intrinsic");
4094
4095 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4096 CI->getArgOperand(2)};
4097 Rep = Builder.CreateIntrinsic(IID, Args);
4098 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4099 : CI->getArgOperand(0);
4100 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4101 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4102 Name.starts_with("avx512.mask.vpermt2var.") ||
4103 Name.starts_with("avx512.maskz.vpermt2var.")) {
4104 bool ZeroMask = Name[11] == 'z';
4105 bool IndexForm = Name[17] == 'i';
4106 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4107 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4108 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4109 Name.starts_with("avx512.mask.vpdpbusds.") ||
4110 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4111 bool ZeroMask = Name[11] == 'z';
4112 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4113 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4114 Intrinsic::ID IID;
4115 if (VecWidth == 128 && !IsSaturating)
4116 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4117 else if (VecWidth == 256 && !IsSaturating)
4118 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4119 else if (VecWidth == 512 && !IsSaturating)
4120 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4121 else if (VecWidth == 128 && IsSaturating)
4122 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4123 else if (VecWidth == 256 && IsSaturating)
4124 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4125 else if (VecWidth == 512 && IsSaturating)
4126 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4127 else
4128 llvm_unreachable("Unexpected intrinsic");
4129
4130 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4131 CI->getArgOperand(2)};
4132 Rep = Builder.CreateIntrinsic(IID, Args);
4133 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4134 : CI->getArgOperand(0);
4135 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4136 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4137 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4138 Name.starts_with("avx512.mask.vpdpwssds.") ||
4139 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4140 bool ZeroMask = Name[11] == 'z';
4141 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4142 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4143 Intrinsic::ID IID;
4144 if (VecWidth == 128 && !IsSaturating)
4145 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4146 else if (VecWidth == 256 && !IsSaturating)
4147 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4148 else if (VecWidth == 512 && !IsSaturating)
4149 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4150 else if (VecWidth == 128 && IsSaturating)
4151 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4152 else if (VecWidth == 256 && IsSaturating)
4153 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4154 else if (VecWidth == 512 && IsSaturating)
4155 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4156 else
4157 llvm_unreachable("Unexpected intrinsic");
4158
4159 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4160 CI->getArgOperand(2)};
4161 Rep = Builder.CreateIntrinsic(IID, Args);
4162 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4163 : CI->getArgOperand(0);
4164 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4165 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4166 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4167 Name == "subborrow.u32" || Name == "subborrow.u64") {
4168 Intrinsic::ID IID;
4169 if (Name[0] == 'a' && Name.back() == '2')
4170 IID = Intrinsic::x86_addcarry_32;
4171 else if (Name[0] == 'a' && Name.back() == '4')
4172 IID = Intrinsic::x86_addcarry_64;
4173 else if (Name[0] == 's' && Name.back() == '2')
4174 IID = Intrinsic::x86_subborrow_32;
4175 else if (Name[0] == 's' && Name.back() == '4')
4176 IID = Intrinsic::x86_subborrow_64;
4177 else
4178 llvm_unreachable("Unexpected intrinsic");
4179
4180 // Make a call with 3 operands.
4181 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4182 CI->getArgOperand(2)};
4183 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4184
4185 // Extract the second result and store it.
4186 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4187 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4188 // Replace the original call result with the first result of the new call.
4189 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4190
4191 CI->replaceAllUsesWith(CF);
4192 Rep = nullptr;
4193 } else if (Name.starts_with("avx512.mask.") &&
4194 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4195 // Rep will be updated by the call in the condition.
4196 }
4197
4198 return Rep;
4199 }
4200
upgradeAArch64IntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4201 static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4202 Function *F, IRBuilder<> &Builder) {
4203 if (Name.starts_with("neon.bfcvt")) {
4204 if (Name.starts_with("neon.bfcvtn2")) {
4205 SmallVector<int, 32> LoMask(4);
4206 std::iota(LoMask.begin(), LoMask.end(), 0);
4207 SmallVector<int, 32> ConcatMask(8);
4208 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4209 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4210 Value *Trunc =
4211 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4212 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4213 } else if (Name.starts_with("neon.bfcvtn")) {
4214 SmallVector<int, 32> ConcatMask(8);
4215 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4216 Type *V4BF16 =
4217 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4218 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4219 dbgs() << "Trunc: " << *Trunc << "\n";
4220 return Builder.CreateShuffleVector(
4221 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4222 } else {
4223 return Builder.CreateFPTrunc(CI->getOperand(0),
4224 Type::getBFloatTy(F->getContext()));
4225 }
4226 } else if (Name.starts_with("sve.fcvt")) {
4227 Intrinsic::ID NewID =
4228 StringSwitch<Intrinsic::ID>(Name)
4229 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4230 .Case("sve.fcvtnt.bf16f32",
4231 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4232 .Default(Intrinsic::not_intrinsic);
4233 if (NewID == Intrinsic::not_intrinsic)
4234 llvm_unreachable("Unhandled Intrinsic!");
4235
4236 SmallVector<Value *, 3> Args(CI->args());
4237
4238 // The original intrinsics incorrectly used a predicate based on the
4239 // smallest element type rather than the largest.
4240 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4241 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4242
4243 if (Args[1]->getType() != BadPredTy)
4244 llvm_unreachable("Unexpected predicate type!");
4245
4246 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4247 BadPredTy, Args[1]);
4248 Args[1] = Builder.CreateIntrinsic(
4249 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4250
4251 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4252 CI->getName());
4253 }
4254
4255 llvm_unreachable("Unhandled Intrinsic!");
4256 }
4257
upgradeARMIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4258 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4259 IRBuilder<> &Builder) {
4260 if (Name == "mve.vctp64.old") {
4261 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4262 // correct type.
4263 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4264 CI->getArgOperand(0),
4265 /*FMFSource=*/nullptr, CI->getName());
4266 Value *C1 = Builder.CreateIntrinsic(
4267 Intrinsic::arm_mve_pred_v2i,
4268 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4269 return Builder.CreateIntrinsic(
4270 Intrinsic::arm_mve_pred_i2v,
4271 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4272 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4273 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4274 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4275 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4276 Name ==
4277 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4278 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4279 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4280 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4281 Name ==
4282 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4283 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4284 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4285 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4286 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4287 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4288 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4289 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4290 std::vector<Type *> Tys;
4291 unsigned ID = CI->getIntrinsicID();
4292 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4293 switch (ID) {
4294 case Intrinsic::arm_mve_mull_int_predicated:
4295 case Intrinsic::arm_mve_vqdmull_predicated:
4296 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4297 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4298 break;
4299 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4300 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4301 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4302 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4303 V2I1Ty};
4304 break;
4305 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4306 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4307 CI->getOperand(1)->getType(), V2I1Ty};
4308 break;
4309 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4310 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4311 CI->getOperand(2)->getType(), V2I1Ty};
4312 break;
4313 case Intrinsic::arm_cde_vcx1q_predicated:
4314 case Intrinsic::arm_cde_vcx1qa_predicated:
4315 case Intrinsic::arm_cde_vcx2q_predicated:
4316 case Intrinsic::arm_cde_vcx2qa_predicated:
4317 case Intrinsic::arm_cde_vcx3q_predicated:
4318 case Intrinsic::arm_cde_vcx3qa_predicated:
4319 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4320 break;
4321 default:
4322 llvm_unreachable("Unhandled Intrinsic!");
4323 }
4324
4325 std::vector<Value *> Ops;
4326 for (Value *Op : CI->args()) {
4327 Type *Ty = Op->getType();
4328 if (Ty->getScalarSizeInBits() == 1) {
4329 Value *C1 = Builder.CreateIntrinsic(
4330 Intrinsic::arm_mve_pred_v2i,
4331 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4332 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4333 }
4334 Ops.push_back(Op);
4335 }
4336
4337 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4338 CI->getName());
4339 }
4340 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4341 }
4342
4343 // These are expected to have the arguments:
4344 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4345 //
4346 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4347 //
upgradeAMDGCNIntrinsicCall(StringRef Name,CallBase * CI,Function * F,IRBuilder<> & Builder)4348 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4349 Function *F, IRBuilder<> &Builder) {
4350 AtomicRMWInst::BinOp RMWOp =
4351 StringSwitch<AtomicRMWInst::BinOp>(Name)
4352 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4353 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4354 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4355 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4356 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4357 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4358 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4359 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4360 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4361 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4362 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4363
4364 unsigned NumOperands = CI->getNumOperands();
4365 if (NumOperands < 3) // Malformed bitcode.
4366 return nullptr;
4367
4368 Value *Ptr = CI->getArgOperand(0);
4369 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4370 if (!PtrTy) // Malformed.
4371 return nullptr;
4372
4373 Value *Val = CI->getArgOperand(1);
4374 if (Val->getType() != CI->getType()) // Malformed.
4375 return nullptr;
4376
4377 ConstantInt *OrderArg = nullptr;
4378 bool IsVolatile = false;
4379
4380 // These should have 5 arguments (plus the callee). A separate version of the
4381 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4382 if (NumOperands > 3)
4383 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4384
4385 // Ignore scope argument at 3
4386
4387 if (NumOperands > 5) {
4388 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4389 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4390 }
4391
4392 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4393 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4394 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4395 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4396 Order = AtomicOrdering::SequentiallyConsistent;
4397
4398 LLVMContext &Ctx = F->getContext();
4399
4400 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4401 Type *RetTy = CI->getType();
4402 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4403 if (VT->getElementType()->isIntegerTy(16)) {
4404 VectorType *AsBF16 =
4405 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4406 Val = Builder.CreateBitCast(Val, AsBF16);
4407 }
4408 }
4409
4410 // The scope argument never really worked correctly. Use agent as the most
4411 // conservative option which should still always produce the instruction.
4412 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4413 AtomicRMWInst *RMW =
4414 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4415
4416 unsigned AddrSpace = PtrTy->getAddressSpace();
4417 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4418 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4419 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4420 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4421 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4422 }
4423
4424 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4425 MDBuilder MDB(F->getContext());
4426 MDNode *RangeNotPrivate =
4427 MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4428 APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4429 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4430 }
4431
4432 if (IsVolatile)
4433 RMW->setVolatile(true);
4434
4435 return Builder.CreateBitCast(RMW, RetTy);
4436 }
4437
4438 /// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4439 /// plain MDNode, as it's the verifier's job to check these are the correct
4440 /// types later.
unwrapMAVOp(CallBase * CI,unsigned Op)4441 static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4442 if (Op < CI->arg_size()) {
4443 if (MetadataAsValue *MAV =
4444 dyn_cast<MetadataAsValue>(CI->getArgOperand(Op))) {
4445 Metadata *MD = MAV->getMetadata();
4446 return dyn_cast_if_present<MDNode>(MD);
4447 }
4448 }
4449 return nullptr;
4450 }
4451
4452 /// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
unwrapMAVMetadataOp(CallBase * CI,unsigned Op)4453 static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4454 if (Op < CI->arg_size())
4455 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4456 return MAV->getMetadata();
4457 return nullptr;
4458 }
4459
getDebugLocSafe(const Instruction * I)4460 static MDNode *getDebugLocSafe(const Instruction *I) {
4461 // The MDNode attached to this instruction might not be the correct type,
4462 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4463 return I->getDebugLoc().getAsMDNode();
4464 }
4465
4466 /// Convert debug intrinsic calls to non-instruction debug records.
4467 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4468 /// \p CI - The debug intrinsic call.
upgradeDbgIntrinsicToDbgRecord(StringRef Name,CallBase * CI)4469 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4470 DbgRecord *DR = nullptr;
4471 if (Name == "label") {
4472 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(unwrapMAVOp(CI, 0),
4473 CI->getDebugLoc());
4474 } else if (Name == "assign") {
4475 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4476 DbgVariableRecord::LocationType::Assign, unwrapMAVMetadataOp(CI, 0),
4477 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4478 unwrapMAVMetadataOp(CI, 4),
4479 /*The address is a Value ref, it will be stored as a Metadata */
4480 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4481 } else if (Name == "declare") {
4482 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4483 DbgVariableRecord::LocationType::Declare, unwrapMAVMetadataOp(CI, 0),
4484 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4485 getDebugLocSafe(CI));
4486 } else if (Name == "addr") {
4487 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4488 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4489 // Don't try to add something to the expression if it's not an expression.
4490 // Instead, allow the verifier to fail later.
4491 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4492 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4493 }
4494 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4495 DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4496 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4497 getDebugLocSafe(CI));
4498 } else if (Name == "value") {
4499 // An old version of dbg.value had an extra offset argument.
4500 unsigned VarOp = 1;
4501 unsigned ExprOp = 2;
4502 if (CI->arg_size() == 4) {
4503 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4504 // Nonzero offset dbg.values get dropped without a replacement.
4505 if (!Offset || !Offset->isZeroValue())
4506 return;
4507 VarOp = 2;
4508 ExprOp = 3;
4509 }
4510 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4511 DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4512 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4513 nullptr, getDebugLocSafe(CI));
4514 }
4515 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4516 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4517 }
4518
4519 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4520 /// provided to seamlessly integrate with existing context.
UpgradeIntrinsicCall(CallBase * CI,Function * NewFn)4521 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4522 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4523 // checks the callee's function type matches. It's likely we need to handle
4524 // type changes here.
4525 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4526 if (!F)
4527 return;
4528
4529 LLVMContext &C = CI->getContext();
4530 IRBuilder<> Builder(C);
4531 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4532
4533 if (!NewFn) {
4534 // Get the Function's name.
4535 StringRef Name = F->getName();
4536
4537 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4538 Name = Name.substr(5);
4539
4540 bool IsX86 = Name.consume_front("x86.");
4541 bool IsNVVM = Name.consume_front("nvvm.");
4542 bool IsAArch64 = Name.consume_front("aarch64.");
4543 bool IsARM = Name.consume_front("arm.");
4544 bool IsAMDGCN = Name.consume_front("amdgcn.");
4545 bool IsDbg = Name.consume_front("dbg.");
4546 Value *Rep = nullptr;
4547
4548 if (!IsX86 && Name == "stackprotectorcheck") {
4549 Rep = nullptr;
4550 } else if (IsNVVM) {
4551 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4552 } else if (IsX86) {
4553 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4554 } else if (IsAArch64) {
4555 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4556 } else if (IsARM) {
4557 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4558 } else if (IsAMDGCN) {
4559 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4560 } else if (IsDbg) {
4561 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4562 } else {
4563 llvm_unreachable("Unknown function for CallBase upgrade.");
4564 }
4565
4566 if (Rep)
4567 CI->replaceAllUsesWith(Rep);
4568 CI->eraseFromParent();
4569 return;
4570 }
4571
4572 const auto &DefaultCase = [&]() -> void {
4573 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4574 // Handle generic mangling change.
4575 assert(
4576 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4577 "Unknown function for CallBase upgrade and isn't just a name change");
4578 CI->setCalledFunction(NewFn);
4579 return;
4580 }
4581
4582 // This must be an upgrade from a named to a literal struct.
4583 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4584 assert(OldST != NewFn->getReturnType() &&
4585 "Return type must have changed");
4586 assert(OldST->getNumElements() ==
4587 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4588 "Must have same number of elements");
4589
4590 SmallVector<Value *> Args(CI->args());
4591 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4592 NewCI->setAttributes(CI->getAttributes());
4593 Value *Res = PoisonValue::get(OldST);
4594 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4595 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4596 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4597 }
4598 CI->replaceAllUsesWith(Res);
4599 CI->eraseFromParent();
4600 return;
4601 }
4602
4603 // We're probably about to produce something invalid. Let the verifier catch
4604 // it instead of dying here.
4605 CI->setCalledOperand(
4606 ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4607 return;
4608 };
4609 CallInst *NewCall = nullptr;
4610 switch (NewFn->getIntrinsicID()) {
4611 default: {
4612 DefaultCase();
4613 return;
4614 }
4615 case Intrinsic::arm_neon_vst1:
4616 case Intrinsic::arm_neon_vst2:
4617 case Intrinsic::arm_neon_vst3:
4618 case Intrinsic::arm_neon_vst4:
4619 case Intrinsic::arm_neon_vst2lane:
4620 case Intrinsic::arm_neon_vst3lane:
4621 case Intrinsic::arm_neon_vst4lane: {
4622 SmallVector<Value *, 4> Args(CI->args());
4623 NewCall = Builder.CreateCall(NewFn, Args);
4624 break;
4625 }
4626 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4627 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4628 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4629 LLVMContext &Ctx = F->getParent()->getContext();
4630 SmallVector<Value *, 4> Args(CI->args());
4631 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4632 cast<ConstantInt>(Args[3])->getZExtValue());
4633 NewCall = Builder.CreateCall(NewFn, Args);
4634 break;
4635 }
4636 case Intrinsic::aarch64_sve_ld3_sret:
4637 case Intrinsic::aarch64_sve_ld4_sret:
4638 case Intrinsic::aarch64_sve_ld2_sret: {
4639 StringRef Name = F->getName();
4640 Name = Name.substr(5);
4641 unsigned N = StringSwitch<unsigned>(Name)
4642 .StartsWith("aarch64.sve.ld2", 2)
4643 .StartsWith("aarch64.sve.ld3", 3)
4644 .StartsWith("aarch64.sve.ld4", 4)
4645 .Default(0);
4646 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4647 unsigned MinElts = RetTy->getMinNumElements() / N;
4648 SmallVector<Value *, 2> Args(CI->args());
4649 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4650 Value *Ret = llvm::PoisonValue::get(RetTy);
4651 for (unsigned I = 0; I < N; I++) {
4652 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4653 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4654 }
4655 NewCall = dyn_cast<CallInst>(Ret);
4656 break;
4657 }
4658
4659 case Intrinsic::coro_end: {
4660 SmallVector<Value *, 3> Args(CI->args());
4661 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4662 NewCall = Builder.CreateCall(NewFn, Args);
4663 break;
4664 }
4665
4666 case Intrinsic::vector_extract: {
4667 StringRef Name = F->getName();
4668 Name = Name.substr(5); // Strip llvm
4669 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4670 DefaultCase();
4671 return;
4672 }
4673 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4674 unsigned MinElts = RetTy->getMinNumElements();
4675 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4676 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4677 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4678 break;
4679 }
4680
4681 case Intrinsic::vector_insert: {
4682 StringRef Name = F->getName();
4683 Name = Name.substr(5);
4684 if (!Name.starts_with("aarch64.sve.tuple")) {
4685 DefaultCase();
4686 return;
4687 }
4688 if (Name.starts_with("aarch64.sve.tuple.set")) {
4689 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4690 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4691 Value *NewIdx =
4692 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4693 NewCall = Builder.CreateCall(
4694 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4695 break;
4696 }
4697 if (Name.starts_with("aarch64.sve.tuple.create")) {
4698 unsigned N = StringSwitch<unsigned>(Name)
4699 .StartsWith("aarch64.sve.tuple.create2", 2)
4700 .StartsWith("aarch64.sve.tuple.create3", 3)
4701 .StartsWith("aarch64.sve.tuple.create4", 4)
4702 .Default(0);
4703 assert(N > 1 && "Create is expected to be between 2-4");
4704 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4705 Value *Ret = llvm::PoisonValue::get(RetTy);
4706 unsigned MinElts = RetTy->getMinNumElements() / N;
4707 for (unsigned I = 0; I < N; I++) {
4708 Value *V = CI->getArgOperand(I);
4709 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4710 }
4711 NewCall = dyn_cast<CallInst>(Ret);
4712 }
4713 break;
4714 }
4715
4716 case Intrinsic::arm_neon_bfdot:
4717 case Intrinsic::arm_neon_bfmmla:
4718 case Intrinsic::arm_neon_bfmlalb:
4719 case Intrinsic::arm_neon_bfmlalt:
4720 case Intrinsic::aarch64_neon_bfdot:
4721 case Intrinsic::aarch64_neon_bfmmla:
4722 case Intrinsic::aarch64_neon_bfmlalb:
4723 case Intrinsic::aarch64_neon_bfmlalt: {
4724 SmallVector<Value *, 3> Args;
4725 assert(CI->arg_size() == 3 &&
4726 "Mismatch between function args and call args");
4727 size_t OperandWidth =
4728 CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4729 assert((OperandWidth == 64 || OperandWidth == 128) &&
4730 "Unexpected operand width");
4731 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4732 auto Iter = CI->args().begin();
4733 Args.push_back(*Iter++);
4734 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4735 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4736 NewCall = Builder.CreateCall(NewFn, Args);
4737 break;
4738 }
4739
4740 case Intrinsic::bitreverse:
4741 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4742 break;
4743
4744 case Intrinsic::ctlz:
4745 case Intrinsic::cttz:
4746 assert(CI->arg_size() == 1 &&
4747 "Mismatch between function args and call args");
4748 NewCall =
4749 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4750 break;
4751
4752 case Intrinsic::objectsize: {
4753 Value *NullIsUnknownSize =
4754 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4755 Value *Dynamic =
4756 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4757 NewCall = Builder.CreateCall(
4758 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4759 break;
4760 }
4761
4762 case Intrinsic::ctpop:
4763 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4764 break;
4765
4766 case Intrinsic::convert_from_fp16:
4767 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4768 break;
4769
4770 case Intrinsic::dbg_value: {
4771 StringRef Name = F->getName();
4772 Name = Name.substr(5); // Strip llvm.
4773 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4774 if (Name.starts_with("dbg.addr")) {
4775 DIExpression *Expr = cast<DIExpression>(
4776 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4777 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4778 NewCall =
4779 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4780 MetadataAsValue::get(C, Expr)});
4781 break;
4782 }
4783
4784 // Upgrade from the old version that had an extra offset argument.
4785 assert(CI->arg_size() == 4);
4786 // Drop nonzero offsets instead of attempting to upgrade them.
4787 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4788 if (Offset->isZeroValue()) {
4789 NewCall = Builder.CreateCall(
4790 NewFn,
4791 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4792 break;
4793 }
4794 CI->eraseFromParent();
4795 return;
4796 }
4797
4798 case Intrinsic::ptr_annotation:
4799 // Upgrade from versions that lacked the annotation attribute argument.
4800 if (CI->arg_size() != 4) {
4801 DefaultCase();
4802 return;
4803 }
4804
4805 // Create a new call with an added null annotation attribute argument.
4806 NewCall = Builder.CreateCall(
4807 NewFn,
4808 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4809 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4810 NewCall->takeName(CI);
4811 CI->replaceAllUsesWith(NewCall);
4812 CI->eraseFromParent();
4813 return;
4814
4815 case Intrinsic::var_annotation:
4816 // Upgrade from versions that lacked the annotation attribute argument.
4817 if (CI->arg_size() != 4) {
4818 DefaultCase();
4819 return;
4820 }
4821 // Create a new call with an added null annotation attribute argument.
4822 NewCall = Builder.CreateCall(
4823 NewFn,
4824 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4825 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4826 NewCall->takeName(CI);
4827 CI->replaceAllUsesWith(NewCall);
4828 CI->eraseFromParent();
4829 return;
4830
4831 case Intrinsic::riscv_aes32dsi:
4832 case Intrinsic::riscv_aes32dsmi:
4833 case Intrinsic::riscv_aes32esi:
4834 case Intrinsic::riscv_aes32esmi:
4835 case Intrinsic::riscv_sm4ks:
4836 case Intrinsic::riscv_sm4ed: {
4837 // The last argument to these intrinsics used to be i8 and changed to i32.
4838 // The type overload for sm4ks and sm4ed was removed.
4839 Value *Arg2 = CI->getArgOperand(2);
4840 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4841 return;
4842
4843 Value *Arg0 = CI->getArgOperand(0);
4844 Value *Arg1 = CI->getArgOperand(1);
4845 if (CI->getType()->isIntegerTy(64)) {
4846 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4847 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4848 }
4849
4850 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4851 cast<ConstantInt>(Arg2)->getZExtValue());
4852
4853 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4854 Value *Res = NewCall;
4855 if (Res->getType() != CI->getType())
4856 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4857 NewCall->takeName(CI);
4858 CI->replaceAllUsesWith(Res);
4859 CI->eraseFromParent();
4860 return;
4861 }
4862 case Intrinsic::nvvm_mapa_shared_cluster: {
4863 // Create a new call with the correct address space.
4864 NewCall =
4865 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4866 Value *Res = NewCall;
4867 Res = Builder.CreateAddrSpaceCast(
4868 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4869 NewCall->takeName(CI);
4870 CI->replaceAllUsesWith(Res);
4871 CI->eraseFromParent();
4872 return;
4873 }
4874 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4875 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4876 // Create a new call with the correct address space.
4877 SmallVector<Value *, 4> Args(CI->args());
4878 Args[0] = Builder.CreateAddrSpaceCast(
4879 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4880
4881 NewCall = Builder.CreateCall(NewFn, Args);
4882 NewCall->takeName(CI);
4883 CI->replaceAllUsesWith(NewCall);
4884 CI->eraseFromParent();
4885 return;
4886 }
4887 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4888 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4889 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4890 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4891 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4892 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4893 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4894 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4895 SmallVector<Value *, 16> Args(CI->args());
4896
4897 // Create AddrSpaceCast to shared_cluster if needed.
4898 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4899 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
4900 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
4901 Args[0] = Builder.CreateAddrSpaceCast(
4902 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4903
4904 // Attach the flag argument for cta_group, with a
4905 // default value of 0. This handles case (2) in
4906 // shouldUpgradeNVPTXTMAG2SIntrinsics().
4907 size_t NumArgs = CI->arg_size();
4908 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
4909 if (!FlagArg->getType()->isIntegerTy(1))
4910 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4911
4912 NewCall = Builder.CreateCall(NewFn, Args);
4913 NewCall->takeName(CI);
4914 CI->replaceAllUsesWith(NewCall);
4915 CI->eraseFromParent();
4916 return;
4917 }
4918 case Intrinsic::riscv_sha256sig0:
4919 case Intrinsic::riscv_sha256sig1:
4920 case Intrinsic::riscv_sha256sum0:
4921 case Intrinsic::riscv_sha256sum1:
4922 case Intrinsic::riscv_sm3p0:
4923 case Intrinsic::riscv_sm3p1: {
4924 // The last argument to these intrinsics used to be i8 and changed to i32.
4925 // The type overload for sm4ks and sm4ed was removed.
4926 if (!CI->getType()->isIntegerTy(64))
4927 return;
4928
4929 Value *Arg =
4930 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4931
4932 NewCall = Builder.CreateCall(NewFn, Arg);
4933 Value *Res =
4934 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4935 NewCall->takeName(CI);
4936 CI->replaceAllUsesWith(Res);
4937 CI->eraseFromParent();
4938 return;
4939 }
4940
4941 case Intrinsic::x86_xop_vfrcz_ss:
4942 case Intrinsic::x86_xop_vfrcz_sd:
4943 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4944 break;
4945
4946 case Intrinsic::x86_xop_vpermil2pd:
4947 case Intrinsic::x86_xop_vpermil2ps:
4948 case Intrinsic::x86_xop_vpermil2pd_256:
4949 case Intrinsic::x86_xop_vpermil2ps_256: {
4950 SmallVector<Value *, 4> Args(CI->args());
4951 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4952 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4953 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4954 NewCall = Builder.CreateCall(NewFn, Args);
4955 break;
4956 }
4957
4958 case Intrinsic::x86_sse41_ptestc:
4959 case Intrinsic::x86_sse41_ptestz:
4960 case Intrinsic::x86_sse41_ptestnzc: {
4961 // The arguments for these intrinsics used to be v4f32, and changed
4962 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4963 // So, the only thing required is a bitcast for both arguments.
4964 // First, check the arguments have the old type.
4965 Value *Arg0 = CI->getArgOperand(0);
4966 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4967 return;
4968
4969 // Old intrinsic, add bitcasts
4970 Value *Arg1 = CI->getArgOperand(1);
4971
4972 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4973
4974 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4975 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4976
4977 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4978 break;
4979 }
4980
4981 case Intrinsic::x86_rdtscp: {
4982 // This used to take 1 arguments. If we have no arguments, it is already
4983 // upgraded.
4984 if (CI->getNumOperands() == 0)
4985 return;
4986
4987 NewCall = Builder.CreateCall(NewFn);
4988 // Extract the second result and store it.
4989 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4990 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4991 // Replace the original call result with the first result of the new call.
4992 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4993
4994 NewCall->takeName(CI);
4995 CI->replaceAllUsesWith(TSC);
4996 CI->eraseFromParent();
4997 return;
4998 }
4999
5000 case Intrinsic::x86_sse41_insertps:
5001 case Intrinsic::x86_sse41_dppd:
5002 case Intrinsic::x86_sse41_dpps:
5003 case Intrinsic::x86_sse41_mpsadbw:
5004 case Intrinsic::x86_avx_dp_ps_256:
5005 case Intrinsic::x86_avx2_mpsadbw: {
5006 // Need to truncate the last argument from i32 to i8 -- this argument models
5007 // an inherently 8-bit immediate operand to these x86 instructions.
5008 SmallVector<Value *, 4> Args(CI->args());
5009
5010 // Replace the last argument with a trunc.
5011 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5012 NewCall = Builder.CreateCall(NewFn, Args);
5013 break;
5014 }
5015
5016 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5017 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5018 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5019 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5020 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5021 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5022 SmallVector<Value *, 4> Args(CI->args());
5023 unsigned NumElts =
5024 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5025 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5026
5027 NewCall = Builder.CreateCall(NewFn, Args);
5028 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5029
5030 NewCall->takeName(CI);
5031 CI->replaceAllUsesWith(Res);
5032 CI->eraseFromParent();
5033 return;
5034 }
5035
5036 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5037 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5038 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5039 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5040 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5041 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5042 SmallVector<Value *, 4> Args(CI->args());
5043 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5044 if (NewFn->getIntrinsicID() ==
5045 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5046 Args[1] = Builder.CreateBitCast(
5047 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5048
5049 NewCall = Builder.CreateCall(NewFn, Args);
5050 Value *Res = Builder.CreateBitCast(
5051 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5052
5053 NewCall->takeName(CI);
5054 CI->replaceAllUsesWith(Res);
5055 CI->eraseFromParent();
5056 return;
5057 }
5058 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5059 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5060 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5061 SmallVector<Value *, 4> Args(CI->args());
5062 unsigned NumElts =
5063 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5064 Args[1] = Builder.CreateBitCast(
5065 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5066 Args[2] = Builder.CreateBitCast(
5067 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5068
5069 NewCall = Builder.CreateCall(NewFn, Args);
5070 break;
5071 }
5072
5073 case Intrinsic::thread_pointer: {
5074 NewCall = Builder.CreateCall(NewFn, {});
5075 break;
5076 }
5077
5078 case Intrinsic::memcpy:
5079 case Intrinsic::memmove:
5080 case Intrinsic::memset: {
5081 // We have to make sure that the call signature is what we're expecting.
5082 // We only want to change the old signatures by removing the alignment arg:
5083 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5084 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5085 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5086 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5087 // Note: i8*'s in the above can be any pointer type
5088 if (CI->arg_size() != 5) {
5089 DefaultCase();
5090 return;
5091 }
5092 // Remove alignment argument (3), and add alignment attributes to the
5093 // dest/src pointers.
5094 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5095 CI->getArgOperand(2), CI->getArgOperand(4)};
5096 NewCall = Builder.CreateCall(NewFn, Args);
5097 AttributeList OldAttrs = CI->getAttributes();
5098 AttributeList NewAttrs = AttributeList::get(
5099 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5100 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5101 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5102 NewCall->setAttributes(NewAttrs);
5103 auto *MemCI = cast<MemIntrinsic>(NewCall);
5104 // All mem intrinsics support dest alignment.
5105 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
5106 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5107 // Memcpy/Memmove also support source alignment.
5108 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5109 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5110 break;
5111 }
5112 }
5113 assert(NewCall && "Should have either set this variable or returned through "
5114 "the default case");
5115 NewCall->takeName(CI);
5116 CI->replaceAllUsesWith(NewCall);
5117 CI->eraseFromParent();
5118 }
5119
UpgradeCallsToIntrinsic(Function * F)5120 void llvm::UpgradeCallsToIntrinsic(Function *F) {
5121 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5122
5123 // Check if this function should be upgraded and get the replacement function
5124 // if there is one.
5125 Function *NewFn;
5126 if (UpgradeIntrinsicFunction(F, NewFn)) {
5127 // Replace all users of the old function with the new function or new
5128 // instructions. This is not a range loop because the call is deleted.
5129 for (User *U : make_early_inc_range(F->users()))
5130 if (CallBase *CB = dyn_cast<CallBase>(U))
5131 UpgradeIntrinsicCall(CB, NewFn);
5132
5133 // Remove old function, no longer used, from the module.
5134 F->eraseFromParent();
5135 }
5136 }
5137
UpgradeTBAANode(MDNode & MD)5138 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5139 const unsigned NumOperands = MD.getNumOperands();
5140 if (NumOperands == 0)
5141 return &MD; // Invalid, punt to a verifier error.
5142
5143 // Check if the tag uses struct-path aware TBAA format.
5144 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5145 return &MD;
5146
5147 auto &Context = MD.getContext();
5148 if (NumOperands == 3) {
5149 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5150 MDNode *ScalarType = MDNode::get(Context, Elts);
5151 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5152 Metadata *Elts2[] = {ScalarType, ScalarType,
5153 ConstantAsMetadata::get(
5154 Constant::getNullValue(Type::getInt64Ty(Context))),
5155 MD.getOperand(2)};
5156 return MDNode::get(Context, Elts2);
5157 }
5158 // Create a MDNode <MD, MD, offset 0>
5159 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
5160 Type::getInt64Ty(Context)))};
5161 return MDNode::get(Context, Elts);
5162 }
5163
UpgradeBitCastInst(unsigned Opc,Value * V,Type * DestTy,Instruction * & Temp)5164 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5165 Instruction *&Temp) {
5166 if (Opc != Instruction::BitCast)
5167 return nullptr;
5168
5169 Temp = nullptr;
5170 Type *SrcTy = V->getType();
5171 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5172 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5173 LLVMContext &Context = V->getContext();
5174
5175 // We have no information about target data layout, so we assume that
5176 // the maximum pointer size is 64bit.
5177 Type *MidTy = Type::getInt64Ty(Context);
5178 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5179
5180 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5181 }
5182
5183 return nullptr;
5184 }
5185
UpgradeBitCastExpr(unsigned Opc,Constant * C,Type * DestTy)5186 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5187 if (Opc != Instruction::BitCast)
5188 return nullptr;
5189
5190 Type *SrcTy = C->getType();
5191 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5192 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5193 LLVMContext &Context = C->getContext();
5194
5195 // We have no information about target data layout, so we assume that
5196 // the maximum pointer size is 64bit.
5197 Type *MidTy = Type::getInt64Ty(Context);
5198
5199 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
5200 DestTy);
5201 }
5202
5203 return nullptr;
5204 }
5205
5206 /// Check the debug info version number, if it is out-dated, drop the debug
5207 /// info. Return true if module is modified.
UpgradeDebugInfo(Module & M)5208 bool llvm::UpgradeDebugInfo(Module &M) {
5209 if (DisableAutoUpgradeDebugInfo)
5210 return false;
5211
5212 // We need to get metadata before the module is verified (i.e., getModuleFlag
5213 // makes assumptions that we haven't verified yet). Carefully extract the flag
5214 // from the metadata.
5215 unsigned Version = 0;
5216 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5217 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5218 if (Flag->getNumOperands() < 3)
5219 return false;
5220 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5221 return K->getString() == "Debug Info Version";
5222 return false;
5223 });
5224 if (OpIt != ModFlags->op_end()) {
5225 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5226 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5227 Version = CI->getZExtValue();
5228 }
5229 }
5230
5231 if (Version == DEBUG_METADATA_VERSION) {
5232 bool BrokenDebugInfo = false;
5233 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5234 report_fatal_error("Broken module found, compilation aborted!");
5235 if (!BrokenDebugInfo)
5236 // Everything is ok.
5237 return false;
5238 else {
5239 // Diagnose malformed debug info.
5240 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5241 M.getContext().diagnose(Diag);
5242 }
5243 }
5244 bool Modified = StripDebugInfo(M);
5245 if (Modified && Version != DEBUG_METADATA_VERSION) {
5246 // Diagnose a version mismatch.
5247 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5248 M.getContext().diagnose(DiagVersion);
5249 }
5250 return Modified;
5251 }
5252
upgradeNVVMFnVectorAttr(const StringRef Attr,const char DimC,GlobalValue * GV,const Metadata * V)5253 static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5254 GlobalValue *GV, const Metadata *V) {
5255 Function *F = cast<Function>(GV);
5256
5257 constexpr StringLiteral DefaultValue = "1";
5258 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5259 unsigned Length = 0;
5260
5261 if (F->hasFnAttribute(Attr)) {
5262 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5263 // parse these elements placing them into Vect3
5264 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5265 for (; Length < 3 && !S.empty(); Length++) {
5266 auto [Part, Rest] = S.split(',');
5267 Vect3[Length] = Part.trim();
5268 S = Rest;
5269 }
5270 }
5271
5272 const unsigned Dim = DimC - 'x';
5273 assert(Dim < 3 && "Unexpected dim char");
5274
5275 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5276
5277 // local variable required for StringRef in Vect3 to point to.
5278 const std::string VStr = llvm::utostr(VInt);
5279 Vect3[Dim] = VStr;
5280 Length = std::max(Length, Dim + 1);
5281
5282 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5283 F->addFnAttr(Attr, NewAttr);
5284 }
5285
isXYZ(StringRef S)5286 static inline bool isXYZ(StringRef S) {
5287 return S == "x" || S == "y" || S == "z";
5288 }
5289
upgradeSingleNVVMAnnotation(GlobalValue * GV,StringRef K,const Metadata * V)5290 bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5291 const Metadata *V) {
5292 if (K == "kernel") {
5293 if (!mdconst::extract<ConstantInt>(V)->isZero())
5294 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5295 return true;
5296 }
5297 if (K == "align") {
5298 // V is a bitfeild specifying two 16-bit values. The alignment value is
5299 // specfied in low 16-bits, The index is specified in the high bits. For the
5300 // index, 0 indicates the return value while higher values correspond to
5301 // each parameter (idx = param + 1).
5302 const uint64_t AlignIdxValuePair =
5303 mdconst::extract<ConstantInt>(V)->getZExtValue();
5304 const unsigned Idx = (AlignIdxValuePair >> 16);
5305 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5306 cast<Function>(GV)->addAttributeAtIndex(
5307 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5308 return true;
5309 }
5310 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5311 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5312 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5313 return true;
5314 }
5315 if (K == "minctasm") {
5316 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5317 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5318 return true;
5319 }
5320 if (K == "maxnreg") {
5321 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5322 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5323 return true;
5324 }
5325 if (K.consume_front("maxntid") && isXYZ(K)) {
5326 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5327 return true;
5328 }
5329 if (K.consume_front("reqntid") && isXYZ(K)) {
5330 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5331 return true;
5332 }
5333 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5334 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5335 return true;
5336 }
5337
5338 return false;
5339 }
5340
UpgradeNVVMAnnotations(Module & M)5341 void llvm::UpgradeNVVMAnnotations(Module &M) {
5342 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5343 if (!NamedMD)
5344 return;
5345
5346 SmallVector<MDNode *, 8> NewNodes;
5347 SmallSet<const MDNode *, 8> SeenNodes;
5348 for (MDNode *MD : NamedMD->operands()) {
5349 if (!SeenNodes.insert(MD).second)
5350 continue;
5351
5352 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5353 if (!GV)
5354 continue;
5355
5356 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5357
5358 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5359 // Each nvvm.annotations metadata entry will be of the following form:
5360 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5361 // start index = 1, to skip the global variable key
5362 // increment = 2, to skip the value for each property-value pairs
5363 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5364 MDString *K = cast<MDString>(MD->getOperand(j));
5365 const MDOperand &V = MD->getOperand(j + 1);
5366 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5367 if (!Upgraded)
5368 NewOperands.append({K, V});
5369 }
5370
5371 if (NewOperands.size() > 1)
5372 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5373 }
5374
5375 NamedMD->clearOperands();
5376 for (MDNode *N : NewNodes)
5377 NamedMD->addOperand(N);
5378 }
5379
5380 /// This checks for objc retain release marker which should be upgraded. It
5381 /// returns true if module is modified.
upgradeRetainReleaseMarker(Module & M)5382 static bool upgradeRetainReleaseMarker(Module &M) {
5383 bool Changed = false;
5384 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5385 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5386 if (ModRetainReleaseMarker) {
5387 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5388 if (Op) {
5389 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5390 if (ID) {
5391 SmallVector<StringRef, 4> ValueComp;
5392 ID->getString().split(ValueComp, "#");
5393 if (ValueComp.size() == 2) {
5394 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5395 ID = MDString::get(M.getContext(), NewValue);
5396 }
5397 M.addModuleFlag(Module::Error, MarkerKey, ID);
5398 M.eraseNamedMetadata(ModRetainReleaseMarker);
5399 Changed = true;
5400 }
5401 }
5402 }
5403 return Changed;
5404 }
5405
UpgradeARCRuntime(Module & M)5406 void llvm::UpgradeARCRuntime(Module &M) {
5407 // This lambda converts normal function calls to ARC runtime functions to
5408 // intrinsic calls.
5409 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5410 llvm::Intrinsic::ID IntrinsicFunc) {
5411 Function *Fn = M.getFunction(OldFunc);
5412
5413 if (!Fn)
5414 return;
5415
5416 Function *NewFn =
5417 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5418
5419 for (User *U : make_early_inc_range(Fn->users())) {
5420 CallInst *CI = dyn_cast<CallInst>(U);
5421 if (!CI || CI->getCalledFunction() != Fn)
5422 continue;
5423
5424 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5425 FunctionType *NewFuncTy = NewFn->getFunctionType();
5426 SmallVector<Value *, 2> Args;
5427
5428 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5429 // value to the return type of the old function.
5430 if (NewFuncTy->getReturnType() != CI->getType() &&
5431 !CastInst::castIsValid(Instruction::BitCast, CI,
5432 NewFuncTy->getReturnType()))
5433 continue;
5434
5435 bool InvalidCast = false;
5436
5437 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5438 Value *Arg = CI->getArgOperand(I);
5439
5440 // Bitcast argument to the parameter type of the new function if it's
5441 // not a variadic argument.
5442 if (I < NewFuncTy->getNumParams()) {
5443 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5444 // to the parameter type of the new function.
5445 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5446 NewFuncTy->getParamType(I))) {
5447 InvalidCast = true;
5448 break;
5449 }
5450 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5451 }
5452 Args.push_back(Arg);
5453 }
5454
5455 if (InvalidCast)
5456 continue;
5457
5458 // Create a call instruction that calls the new function.
5459 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5460 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5461 NewCall->takeName(CI);
5462
5463 // Bitcast the return value back to the type of the old call.
5464 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5465
5466 if (!CI->use_empty())
5467 CI->replaceAllUsesWith(NewRetVal);
5468 CI->eraseFromParent();
5469 }
5470
5471 if (Fn->use_empty())
5472 Fn->eraseFromParent();
5473 };
5474
5475 // Unconditionally convert a call to "clang.arc.use" to a call to
5476 // "llvm.objc.clang.arc.use".
5477 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5478
5479 // Upgrade the retain release marker. If there is no need to upgrade
5480 // the marker, that means either the module is already new enough to contain
5481 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5482 if (!upgradeRetainReleaseMarker(M))
5483 return;
5484
5485 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5486 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5487 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5488 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5489 {"objc_autoreleaseReturnValue",
5490 llvm::Intrinsic::objc_autoreleaseReturnValue},
5491 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5492 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5493 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5494 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5495 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5496 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5497 {"objc_release", llvm::Intrinsic::objc_release},
5498 {"objc_retain", llvm::Intrinsic::objc_retain},
5499 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5500 {"objc_retainAutoreleaseReturnValue",
5501 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5502 {"objc_retainAutoreleasedReturnValue",
5503 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5504 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5505 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5506 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5507 {"objc_unsafeClaimAutoreleasedReturnValue",
5508 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5509 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5510 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5511 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5512 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5513 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5514 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5515 {"objc_arc_annotation_topdown_bbstart",
5516 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5517 {"objc_arc_annotation_topdown_bbend",
5518 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5519 {"objc_arc_annotation_bottomup_bbstart",
5520 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5521 {"objc_arc_annotation_bottomup_bbend",
5522 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5523
5524 for (auto &I : RuntimeFuncs)
5525 UpgradeToIntrinsic(I.first, I.second);
5526 }
5527
UpgradeModuleFlags(Module & M)5528 bool llvm::UpgradeModuleFlags(Module &M) {
5529 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5530 if (!ModFlags)
5531 return false;
5532
5533 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5534 bool HasSwiftVersionFlag = false;
5535 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5536 uint32_t SwiftABIVersion;
5537 auto Int8Ty = Type::getInt8Ty(M.getContext());
5538 auto Int32Ty = Type::getInt32Ty(M.getContext());
5539
5540 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5541 MDNode *Op = ModFlags->getOperand(I);
5542 if (Op->getNumOperands() != 3)
5543 continue;
5544 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5545 if (!ID)
5546 continue;
5547 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5548 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5549 Type::getInt32Ty(M.getContext()), B)),
5550 MDString::get(M.getContext(), ID->getString()),
5551 Op->getOperand(2)};
5552 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5553 Changed = true;
5554 };
5555
5556 if (ID->getString() == "Objective-C Image Info Version")
5557 HasObjCFlag = true;
5558 if (ID->getString() == "Objective-C Class Properties")
5559 HasClassProperties = true;
5560 // Upgrade PIC from Error/Max to Min.
5561 if (ID->getString() == "PIC Level") {
5562 if (auto *Behavior =
5563 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5564 uint64_t V = Behavior->getLimitedValue();
5565 if (V == Module::Error || V == Module::Max)
5566 SetBehavior(Module::Min);
5567 }
5568 }
5569 // Upgrade "PIE Level" from Error to Max.
5570 if (ID->getString() == "PIE Level")
5571 if (auto *Behavior =
5572 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5573 if (Behavior->getLimitedValue() == Module::Error)
5574 SetBehavior(Module::Max);
5575
5576 // Upgrade branch protection and return address signing module flags. The
5577 // module flag behavior for these fields were Error and now they are Min.
5578 if (ID->getString() == "branch-target-enforcement" ||
5579 ID->getString().starts_with("sign-return-address")) {
5580 if (auto *Behavior =
5581 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5582 if (Behavior->getLimitedValue() == Module::Error) {
5583 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5584 Metadata *Ops[3] = {
5585 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5586 Op->getOperand(1), Op->getOperand(2)};
5587 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5588 Changed = true;
5589 }
5590 }
5591 }
5592
5593 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5594 // section name so that llvm-lto will not complain about mismatching
5595 // module flags that is functionally the same.
5596 if (ID->getString() == "Objective-C Image Info Section") {
5597 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5598 SmallVector<StringRef, 4> ValueComp;
5599 Value->getString().split(ValueComp, " ");
5600 if (ValueComp.size() != 1) {
5601 std::string NewValue;
5602 for (auto &S : ValueComp)
5603 NewValue += S.str();
5604 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5605 MDString::get(M.getContext(), NewValue)};
5606 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5607 Changed = true;
5608 }
5609 }
5610 }
5611
5612 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5613 // If the higher bits are set, it adds new module flag for swift info.
5614 if (ID->getString() == "Objective-C Garbage Collection") {
5615 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5616 if (Md) {
5617 assert(Md->getValue() && "Expected non-empty metadata");
5618 auto Type = Md->getValue()->getType();
5619 if (Type == Int8Ty)
5620 continue;
5621 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5622 if ((Val & 0xff) != Val) {
5623 HasSwiftVersionFlag = true;
5624 SwiftABIVersion = (Val & 0xff00) >> 8;
5625 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5626 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5627 }
5628 Metadata *Ops[3] = {
5629 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5630 Op->getOperand(1),
5631 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5632 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5633 Changed = true;
5634 }
5635 }
5636
5637 if (ID->getString() == "amdgpu_code_object_version") {
5638 Metadata *Ops[3] = {
5639 Op->getOperand(0),
5640 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5641 Op->getOperand(2)};
5642 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5643 Changed = true;
5644 }
5645 }
5646
5647 // "Objective-C Class Properties" is recently added for Objective-C. We
5648 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5649 // flag of value 0, so we can correclty downgrade this flag when trying to
5650 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5651 // this module flag.
5652 if (HasObjCFlag && !HasClassProperties) {
5653 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5654 (uint32_t)0);
5655 Changed = true;
5656 }
5657
5658 if (HasSwiftVersionFlag) {
5659 M.addModuleFlag(Module::Error, "Swift ABI Version",
5660 SwiftABIVersion);
5661 M.addModuleFlag(Module::Error, "Swift Major Version",
5662 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5663 M.addModuleFlag(Module::Error, "Swift Minor Version",
5664 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5665 Changed = true;
5666 }
5667
5668 return Changed;
5669 }
5670
UpgradeSectionAttributes(Module & M)5671 void llvm::UpgradeSectionAttributes(Module &M) {
5672 auto TrimSpaces = [](StringRef Section) -> std::string {
5673 SmallVector<StringRef, 5> Components;
5674 Section.split(Components, ',');
5675
5676 SmallString<32> Buffer;
5677 raw_svector_ostream OS(Buffer);
5678
5679 for (auto Component : Components)
5680 OS << ',' << Component.trim();
5681
5682 return std::string(OS.str().substr(1));
5683 };
5684
5685 for (auto &GV : M.globals()) {
5686 if (!GV.hasSection())
5687 continue;
5688
5689 StringRef Section = GV.getSection();
5690
5691 if (!Section.starts_with("__DATA, __objc_catlist"))
5692 continue;
5693
5694 // __DATA, __objc_catlist, regular, no_dead_strip
5695 // __DATA,__objc_catlist,regular,no_dead_strip
5696 GV.setSection(TrimSpaces(Section));
5697 }
5698 }
5699
5700 namespace {
5701 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5702 // callsites within a function that did not also have the strictfp attribute.
5703 // Since 10.0, if strict FP semantics are needed within a function, the
5704 // function must have the strictfp attribute and all calls within the function
5705 // must also have the strictfp attribute. This latter restriction is
5706 // necessary to prevent unwanted libcall simplification when a function is
5707 // being cloned (such as for inlining).
5708 //
5709 // The "dangling" strictfp attribute usage was only used to prevent constant
5710 // folding and other libcall simplification. The nobuiltin attribute on the
5711 // callsite has the same effect.
5712 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5713 StrictFPUpgradeVisitor() = default;
5714
visitCallBase__anonaa1e81db0611::StrictFPUpgradeVisitor5715 void visitCallBase(CallBase &Call) {
5716 if (!Call.isStrictFP())
5717 return;
5718 if (isa<ConstrainedFPIntrinsic>(&Call))
5719 return;
5720 // If we get here, the caller doesn't have the strictfp attribute
5721 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5722 Call.removeFnAttr(Attribute::StrictFP);
5723 Call.addFnAttr(Attribute::NoBuiltin);
5724 }
5725 };
5726
5727 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5728 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5729 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5730 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5731
visitAtomicRMWInst__anonaa1e81db0611::AMDGPUUnsafeFPAtomicsUpgradeVisitor5732 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5733 if (!RMW.isFloatingPointOperation())
5734 return;
5735
5736 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5737 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5738 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5739 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5740 }
5741 };
5742 } // namespace
5743
UpgradeFunctionAttributes(Function & F)5744 void llvm::UpgradeFunctionAttributes(Function &F) {
5745 // If a function definition doesn't have the strictfp attribute,
5746 // convert any callsite strictfp attributes to nobuiltin.
5747 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5748 StrictFPUpgradeVisitor SFPV;
5749 SFPV.visit(F);
5750 }
5751
5752 // Remove all incompatibile attributes from function.
5753 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5754 F.getReturnType(), F.getAttributes().getRetAttrs()));
5755 for (auto &Arg : F.args())
5756 Arg.removeAttrs(
5757 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5758
5759 // Older versions of LLVM treated an "implicit-section-name" attribute
5760 // similarly to directly setting the section on a Function.
5761 if (Attribute A = F.getFnAttribute("implicit-section-name");
5762 A.isValid() && A.isStringAttribute()) {
5763 F.setSection(A.getValueAsString());
5764 F.removeFnAttr("implicit-section-name");
5765 }
5766
5767 if (!F.empty()) {
5768 // For some reason this is called twice, and the first time is before any
5769 // instructions are loaded into the body.
5770
5771 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5772 A.isValid()) {
5773
5774 if (A.getValueAsBool()) {
5775 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5776 Visitor.visit(F);
5777 }
5778
5779 // We will leave behind dead attribute uses on external declarations, but
5780 // clang never added these to declarations anyway.
5781 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5782 }
5783 }
5784 }
5785
isOldLoopArgument(Metadata * MD)5786 static bool isOldLoopArgument(Metadata *MD) {
5787 auto *T = dyn_cast_or_null<MDTuple>(MD);
5788 if (!T)
5789 return false;
5790 if (T->getNumOperands() < 1)
5791 return false;
5792 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5793 if (!S)
5794 return false;
5795 return S->getString().starts_with("llvm.vectorizer.");
5796 }
5797
upgradeLoopTag(LLVMContext & C,StringRef OldTag)5798 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5799 StringRef OldPrefix = "llvm.vectorizer.";
5800 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5801
5802 if (OldTag == "llvm.vectorizer.unroll")
5803 return MDString::get(C, "llvm.loop.interleave.count");
5804
5805 return MDString::get(
5806 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5807 .str());
5808 }
5809
upgradeLoopArgument(Metadata * MD)5810 static Metadata *upgradeLoopArgument(Metadata *MD) {
5811 auto *T = dyn_cast_or_null<MDTuple>(MD);
5812 if (!T)
5813 return MD;
5814 if (T->getNumOperands() < 1)
5815 return MD;
5816 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5817 if (!OldTag)
5818 return MD;
5819 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5820 return MD;
5821
5822 // This has an old tag. Upgrade it.
5823 SmallVector<Metadata *, 8> Ops;
5824 Ops.reserve(T->getNumOperands());
5825 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5826 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5827 Ops.push_back(T->getOperand(I));
5828
5829 return MDTuple::get(T->getContext(), Ops);
5830 }
5831
upgradeInstructionLoopAttachment(MDNode & N)5832 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5833 auto *T = dyn_cast<MDTuple>(&N);
5834 if (!T)
5835 return &N;
5836
5837 if (none_of(T->operands(), isOldLoopArgument))
5838 return &N;
5839
5840 SmallVector<Metadata *, 8> Ops;
5841 Ops.reserve(T->getNumOperands());
5842 for (Metadata *MD : T->operands())
5843 Ops.push_back(upgradeLoopArgument(MD));
5844
5845 return MDTuple::get(T->getContext(), Ops);
5846 }
5847
UpgradeDataLayoutString(StringRef DL,StringRef TT)5848 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5849 Triple T(TT);
5850 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5851 // the address space of globals to 1. This does not apply to SPIRV Logical.
5852 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5853 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5854 !DL.contains("-G") && !DL.starts_with("G")) {
5855 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5856 }
5857
5858 if (T.isLoongArch64() || T.isRISCV64()) {
5859 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5860 auto I = DL.find("-n64-");
5861 if (I != StringRef::npos)
5862 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5863 return DL.str();
5864 }
5865
5866 std::string Res = DL.str();
5867 // AMDGCN data layout upgrades.
5868 if (T.isAMDGCN()) {
5869 // Define address spaces for constants.
5870 if (!DL.contains("-G") && !DL.starts_with("G"))
5871 Res.append(Res.empty() ? "G1" : "-G1");
5872
5873 // Add missing non-integral declarations.
5874 // This goes before adding new address spaces to prevent incoherent string
5875 // values.
5876 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5877 Res.append("-ni:7:8:9");
5878 // Update ni:7 to ni:7:8:9.
5879 if (DL.ends_with("ni:7"))
5880 Res.append(":8:9");
5881 if (DL.ends_with("ni:7:8"))
5882 Res.append(":9");
5883
5884 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5885 // resources) An empty data layout has already been upgraded to G1 by now.
5886 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5887 Res.append("-p7:160:256:256:32");
5888 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5889 Res.append("-p8:128:128:128:48");
5890 constexpr StringRef OldP8("-p8:128:128-");
5891 if (DL.contains(OldP8))
5892 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
5893 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5894 Res.append("-p9:192:256:256:32");
5895
5896 return Res;
5897 }
5898
5899 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5900 // If the datalayout matches the expected format, add pointer size address
5901 // spaces to the datalayout.
5902 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5903 if (!DL.contains(AddrSpaces)) {
5904 SmallVector<StringRef, 4> Groups;
5905 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5906 if (R.match(Res, &Groups))
5907 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5908 }
5909 };
5910
5911 // AArch64 data layout upgrades.
5912 if (T.isAArch64()) {
5913 // Add "-Fn32"
5914 if (!DL.empty() && !DL.contains("-Fn32"))
5915 Res.append("-Fn32");
5916 AddPtr32Ptr64AddrSpaces();
5917 return Res;
5918 }
5919
5920 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5921 T.isWasm()) {
5922 // Mips64 with o32 ABI did not add "-i128:128".
5923 // Add "-i128:128"
5924 std::string I64 = "-i64:64";
5925 std::string I128 = "-i128:128";
5926 if (!StringRef(Res).contains(I128)) {
5927 size_t Pos = Res.find(I64);
5928 if (Pos != size_t(-1))
5929 Res.insert(Pos + I64.size(), I128);
5930 }
5931 return Res;
5932 }
5933
5934 if (!T.isX86())
5935 return Res;
5936
5937 AddPtr32Ptr64AddrSpaces();
5938
5939 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5940 // for i128 operations prior to this being reflected in the data layout, and
5941 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5942 // boundaries, so although this is a breaking change, the upgrade is expected
5943 // to fix more IR than it breaks.
5944 // Intel MCU is an exception and uses 4-byte-alignment.
5945 if (!T.isOSIAMCU()) {
5946 std::string I128 = "-i128:128";
5947 if (StringRef Ref = Res; !Ref.contains(I128)) {
5948 SmallVector<StringRef, 4> Groups;
5949 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5950 if (R.match(Res, &Groups))
5951 Res = (Groups[1] + I128 + Groups[3]).str();
5952 }
5953 }
5954
5955 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5956 // Raising the alignment is safe because Clang did not produce f80 values in
5957 // the MSVC environment before this upgrade was added.
5958 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5959 StringRef Ref = Res;
5960 auto I = Ref.find("-f80:32-");
5961 if (I != StringRef::npos)
5962 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5963 }
5964
5965 return Res;
5966 }
5967
UpgradeAttributes(AttrBuilder & B)5968 void llvm::UpgradeAttributes(AttrBuilder &B) {
5969 StringRef FramePointer;
5970 Attribute A = B.getAttribute("no-frame-pointer-elim");
5971 if (A.isValid()) {
5972 // The value can be "true" or "false".
5973 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5974 B.removeAttribute("no-frame-pointer-elim");
5975 }
5976 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5977 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5978 if (FramePointer != "all")
5979 FramePointer = "non-leaf";
5980 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5981 }
5982 if (!FramePointer.empty())
5983 B.addAttribute("frame-pointer", FramePointer);
5984
5985 A = B.getAttribute("null-pointer-is-valid");
5986 if (A.isValid()) {
5987 // The value can be "true" or "false".
5988 bool NullPointerIsValid = A.getValueAsString() == "true";
5989 B.removeAttribute("null-pointer-is-valid");
5990 if (NullPointerIsValid)
5991 B.addAttribute(Attribute::NullPointerIsValid);
5992 }
5993 }
5994
UpgradeOperandBundles(std::vector<OperandBundleDef> & Bundles)5995 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5996 // clang.arc.attachedcall bundles are now required to have an operand.
5997 // If they don't, it's okay to drop them entirely: when there is an operand,
5998 // the "attachedcall" is meaningful and required, but without an operand,
5999 // it's just a marker NOP. Dropping it merely prevents an optimization.
6000 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6001 return OBD.getTag() == "clang.arc.attachedcall" &&
6002 OBD.inputs().empty();
6003 });
6004 }
6005